[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / GlobalISel / llvm.amdgcn.s.buffer.load.ll
blobb621185d83edd70ab7bd33e41f4afaf6e36eb819
1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
6 ; FIXME: Merge with regbankselect, which mostly overlaps when all types supported.
8 ; Natural mapping
9 define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
10   ; GFX6-LABEL: name: s_buffer_load_i32
11   ; GFX6: bb.1 (%ir-block.0):
12   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
13   ; GFX6-NEXT: {{  $}}
14   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
15   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
16   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
17   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
18   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
19   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
20   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
21   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
22   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
23   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
24   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
25   ;
26   ; GFX7-LABEL: name: s_buffer_load_i32
27   ; GFX7: bb.1 (%ir-block.0):
28   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
29   ; GFX7-NEXT: {{  $}}
30   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
31   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
32   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
33   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
34   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
35   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
36   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
37   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
38   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
39   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
40   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
41   ;
42   ; GFX8-LABEL: name: s_buffer_load_i32
43   ; GFX8: bb.1 (%ir-block.0):
44   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
45   ; GFX8-NEXT: {{  $}}
46   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
47   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
48   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
49   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
50   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
51   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
52   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
53   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
54   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
55   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
56   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
57   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
58   ret i32 %val
61 define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
62   ; GFX6-LABEL: name: s_buffer_load_i32_glc
63   ; GFX6: bb.1 (%ir-block.0):
64   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
65   ; GFX6-NEXT: {{  $}}
66   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
67   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
68   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
69   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
70   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
71   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
72   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
73   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
74   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
75   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
76   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
77   ;
78   ; GFX7-LABEL: name: s_buffer_load_i32_glc
79   ; GFX7: bb.1 (%ir-block.0):
80   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
81   ; GFX7-NEXT: {{  $}}
82   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
83   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
84   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
85   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
86   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
87   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
88   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
89   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
90   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
91   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
92   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
93   ;
94   ; GFX8-LABEL: name: s_buffer_load_i32_glc
95   ; GFX8: bb.1 (%ir-block.0):
96   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
97   ; GFX8-NEXT: {{  $}}
98   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
99   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
100   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
101   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
102   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
103   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
104   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
105   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
106   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
107   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
108   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
109   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1)
110   ret i32 %val
113 define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
114   ; GFX6-LABEL: name: s_buffer_load_v2i32
115   ; GFX6: bb.1 (%ir-block.0):
116   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
117   ; GFX6-NEXT: {{  $}}
118   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
119   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
120   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
121   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
122   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
123   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
124   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
125   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
126   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
127   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
128   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
129   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
130   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
131   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
132   ; GFX6-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
133   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
134   ;
135   ; GFX7-LABEL: name: s_buffer_load_v2i32
136   ; GFX7: bb.1 (%ir-block.0):
137   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
138   ; GFX7-NEXT: {{  $}}
139   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
140   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
141   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
142   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
143   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
144   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
145   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
146   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
147   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
148   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
149   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
150   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
151   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
152   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
153   ; GFX7-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
154   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
155   ;
156   ; GFX8-LABEL: name: s_buffer_load_v2i32
157   ; GFX8: bb.1 (%ir-block.0):
158   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
159   ; GFX8-NEXT: {{  $}}
160   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
161   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
162   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
163   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
164   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
165   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
166   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
167   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
168   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
169   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
170   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
171   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
172   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
173   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
174   ; GFX8-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
175   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
176   %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
177   ret <2 x i32> %val
180 define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
181   ; GFX6-LABEL: name: s_buffer_load_v3i32
182   ; GFX6: bb.1 (%ir-block.0):
183   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
184   ; GFX6-NEXT: {{  $}}
185   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
186   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
187   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
188   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
189   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
190   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
191   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
192   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0
193   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1
194   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2
195   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3
196   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
197   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
198   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
199   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
200   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
201   ; GFX6-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
202   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
203   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
204   ; GFX6-NEXT:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
205   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
206   ;
207   ; GFX7-LABEL: name: s_buffer_load_v3i32
208   ; GFX7: bb.1 (%ir-block.0):
209   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
210   ; GFX7-NEXT: {{  $}}
211   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
212   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
213   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
214   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
215   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
216   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
217   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
218   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0
219   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1
220   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2
221   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3
222   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
223   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
224   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
225   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
226   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
227   ; GFX7-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
228   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
229   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
230   ; GFX7-NEXT:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
231   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
232   ;
233   ; GFX8-LABEL: name: s_buffer_load_v3i32
234   ; GFX8: bb.1 (%ir-block.0):
235   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
236   ; GFX8-NEXT: {{  $}}
237   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
238   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
239   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
240   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
241   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
242   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
243   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
244   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0
245   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1
246   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2
247   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3
248   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
249   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
250   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
251   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
252   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
253   ; GFX8-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
254   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
255   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
256   ; GFX8-NEXT:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
257   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
258   %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
259   ret <3 x i32> %val
262 define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
263   ; GFX6-LABEL: name: s_buffer_load_v8i32
264   ; GFX6: bb.1 (%ir-block.0):
265   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
266   ; GFX6-NEXT: {{  $}}
267   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
268   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
269   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
270   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
271   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
272   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
273   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
274   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
275   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
276   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
277   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
278   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
279   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
280   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
281   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
282   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
283   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
284   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
285   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
286   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
287   ; GFX6-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
288   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
289   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
290   ; GFX6-NEXT:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
291   ; GFX6-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
292   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
293   ; GFX6-NEXT:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
294   ; GFX6-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
295   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
296   ; GFX6-NEXT:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
297   ; GFX6-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
298   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
299   ; GFX6-NEXT:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
300   ; GFX6-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
301   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
302   ; GFX6-NEXT:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
303   ; GFX6-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
304   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
305   ; GFX6-NEXT:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
306   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
307   ;
308   ; GFX7-LABEL: name: s_buffer_load_v8i32
309   ; GFX7: bb.1 (%ir-block.0):
310   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
311   ; GFX7-NEXT: {{  $}}
312   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
313   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
314   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
315   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
316   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
317   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
318   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
319   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
320   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
321   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
322   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
323   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
324   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
325   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
326   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
327   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
328   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
329   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
330   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
331   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
332   ; GFX7-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
333   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
334   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
335   ; GFX7-NEXT:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
336   ; GFX7-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
337   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
338   ; GFX7-NEXT:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
339   ; GFX7-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
340   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
341   ; GFX7-NEXT:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
342   ; GFX7-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
343   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
344   ; GFX7-NEXT:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
345   ; GFX7-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
346   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
347   ; GFX7-NEXT:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
348   ; GFX7-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
349   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
350   ; GFX7-NEXT:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
351   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
352   ;
353   ; GFX8-LABEL: name: s_buffer_load_v8i32
354   ; GFX8: bb.1 (%ir-block.0):
355   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
356   ; GFX8-NEXT: {{  $}}
357   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
358   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
359   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
360   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
361   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
362   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
363   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
364   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
365   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
366   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
367   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
368   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
369   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
370   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
371   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
372   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
373   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
374   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
375   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
376   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
377   ; GFX8-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
378   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
379   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
380   ; GFX8-NEXT:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
381   ; GFX8-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
382   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
383   ; GFX8-NEXT:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
384   ; GFX8-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
385   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
386   ; GFX8-NEXT:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
387   ; GFX8-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
388   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
389   ; GFX8-NEXT:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
390   ; GFX8-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
391   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
392   ; GFX8-NEXT:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
393   ; GFX8-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
394   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
395   ; GFX8-NEXT:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
396   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
397   %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
398   ret <8 x i32> %val
401 define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
402   ; GFX6-LABEL: name: s_buffer_load_v16i32
403   ; GFX6: bb.1 (%ir-block.0):
404   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
405   ; GFX6-NEXT: {{  $}}
406   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
407   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
408   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
409   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
410   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
411   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
412   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
413   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
414   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
415   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
416   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
417   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
418   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
419   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
420   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
421   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
422   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
423   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
424   ; GFX6-NEXT:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
425   ; GFX6-NEXT:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
426   ; GFX6-NEXT:   [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
427   ; GFX6-NEXT:   [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
428   ; GFX6-NEXT:   [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
429   ; GFX6-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
430   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
431   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
432   ; GFX6-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
433   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
434   ; GFX6-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
435   ; GFX6-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
436   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
437   ; GFX6-NEXT:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
438   ; GFX6-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
439   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
440   ; GFX6-NEXT:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
441   ; GFX6-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
442   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
443   ; GFX6-NEXT:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
444   ; GFX6-NEXT:   [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
445   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
446   ; GFX6-NEXT:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
447   ; GFX6-NEXT:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
448   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
449   ; GFX6-NEXT:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
450   ; GFX6-NEXT:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
451   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
452   ; GFX6-NEXT:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
453   ; GFX6-NEXT:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
454   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
455   ; GFX6-NEXT:   $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
456   ; GFX6-NEXT:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
457   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
458   ; GFX6-NEXT:   $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
459   ; GFX6-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
460   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
461   ; GFX6-NEXT:   $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
462   ; GFX6-NEXT:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
463   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
464   ; GFX6-NEXT:   $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
465   ; GFX6-NEXT:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
466   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
467   ; GFX6-NEXT:   $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
468   ; GFX6-NEXT:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
469   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
470   ; GFX6-NEXT:   $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
471   ; GFX6-NEXT:   [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
472   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
473   ; GFX6-NEXT:   $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
474   ; GFX6-NEXT:   [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
475   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
476   ; GFX6-NEXT:   $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
477   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
478   ;
479   ; GFX7-LABEL: name: s_buffer_load_v16i32
480   ; GFX7: bb.1 (%ir-block.0):
481   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
482   ; GFX7-NEXT: {{  $}}
483   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
484   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
485   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
486   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
487   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
488   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
489   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
490   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
491   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
492   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
493   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
494   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
495   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
496   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
497   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
498   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
499   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
500   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
501   ; GFX7-NEXT:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
502   ; GFX7-NEXT:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
503   ; GFX7-NEXT:   [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
504   ; GFX7-NEXT:   [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
505   ; GFX7-NEXT:   [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
506   ; GFX7-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
507   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
508   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
509   ; GFX7-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
510   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
511   ; GFX7-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
512   ; GFX7-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
513   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
514   ; GFX7-NEXT:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
515   ; GFX7-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
516   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
517   ; GFX7-NEXT:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
518   ; GFX7-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
519   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
520   ; GFX7-NEXT:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
521   ; GFX7-NEXT:   [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
522   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
523   ; GFX7-NEXT:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
524   ; GFX7-NEXT:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
525   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
526   ; GFX7-NEXT:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
527   ; GFX7-NEXT:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
528   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
529   ; GFX7-NEXT:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
530   ; GFX7-NEXT:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
531   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
532   ; GFX7-NEXT:   $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
533   ; GFX7-NEXT:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
534   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
535   ; GFX7-NEXT:   $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
536   ; GFX7-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
537   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
538   ; GFX7-NEXT:   $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
539   ; GFX7-NEXT:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
540   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
541   ; GFX7-NEXT:   $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
542   ; GFX7-NEXT:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
543   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
544   ; GFX7-NEXT:   $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
545   ; GFX7-NEXT:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
546   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
547   ; GFX7-NEXT:   $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
548   ; GFX7-NEXT:   [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
549   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
550   ; GFX7-NEXT:   $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
551   ; GFX7-NEXT:   [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
552   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
553   ; GFX7-NEXT:   $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
554   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
555   ;
556   ; GFX8-LABEL: name: s_buffer_load_v16i32
557   ; GFX8: bb.1 (%ir-block.0):
558   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
559   ; GFX8-NEXT: {{  $}}
560   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
561   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
562   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
563   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
564   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
565   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
566   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
567   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
568   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
569   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
570   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
571   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
572   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
573   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
574   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
575   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
576   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
577   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
578   ; GFX8-NEXT:   [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
579   ; GFX8-NEXT:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
580   ; GFX8-NEXT:   [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
581   ; GFX8-NEXT:   [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
582   ; GFX8-NEXT:   [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
583   ; GFX8-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
584   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
585   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
586   ; GFX8-NEXT:   [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
587   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
588   ; GFX8-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
589   ; GFX8-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
590   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
591   ; GFX8-NEXT:   $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
592   ; GFX8-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
593   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
594   ; GFX8-NEXT:   $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
595   ; GFX8-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
596   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
597   ; GFX8-NEXT:   $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
598   ; GFX8-NEXT:   [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
599   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
600   ; GFX8-NEXT:   $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
601   ; GFX8-NEXT:   [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
602   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
603   ; GFX8-NEXT:   $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
604   ; GFX8-NEXT:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
605   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
606   ; GFX8-NEXT:   $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
607   ; GFX8-NEXT:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
608   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
609   ; GFX8-NEXT:   $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
610   ; GFX8-NEXT:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
611   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
612   ; GFX8-NEXT:   $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
613   ; GFX8-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
614   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
615   ; GFX8-NEXT:   $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
616   ; GFX8-NEXT:   [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
617   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
618   ; GFX8-NEXT:   $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
619   ; GFX8-NEXT:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
620   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
621   ; GFX8-NEXT:   $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
622   ; GFX8-NEXT:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
623   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
624   ; GFX8-NEXT:   $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
625   ; GFX8-NEXT:   [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
626   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
627   ; GFX8-NEXT:   $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
628   ; GFX8-NEXT:   [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
629   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
630   ; GFX8-NEXT:   $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
631   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
632   %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
633   ret <16 x i32> %val
636 define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) {
637   ; GFX6-LABEL: name: s_buffer_load_i32_offset_1
638   ; GFX6: bb.1 (%ir-block.0):
639   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
640   ; GFX6-NEXT: {{  $}}
641   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
642   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
643   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
644   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
645   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
646   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
647   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
648   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
649   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
650   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
651   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
652   ;
653   ; GFX7-LABEL: name: s_buffer_load_i32_offset_1
654   ; GFX7: bb.1 (%ir-block.0):
655   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
656   ; GFX7-NEXT: {{  $}}
657   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
658   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
659   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
660   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
661   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
662   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
663   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
664   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
665   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
666   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
667   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
668   ;
669   ; GFX8-LABEL: name: s_buffer_load_i32_offset_1
670   ; GFX8: bb.1 (%ir-block.0):
671   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
672   ; GFX8-NEXT: {{  $}}
673   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
674   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
675   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
676   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
677   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
678   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32))
679   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
680   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
681   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
682   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
683   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0)
684   ret i32 %val
687 define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) {
688   ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_4
689   ; GFX6: bb.1 (%ir-block.0):
690   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
691   ; GFX6-NEXT: {{  $}}
692   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
693   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
694   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
695   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
696   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
697   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32))
698   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
699   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
700   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
701   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
702   ;
703   ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4
704   ; GFX7: bb.1 (%ir-block.0):
705   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
706   ; GFX7-NEXT: {{  $}}
707   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
708   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
709   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
710   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
711   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
712   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32))
713   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
714   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
715   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
716   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
717   ;
718   ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4
719   ; GFX8: bb.1 (%ir-block.0):
720   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
721   ; GFX8-NEXT: {{  $}}
722   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
723   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
724   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
725   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
726   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
727   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32))
728   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
729   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
730   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
731   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
732   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1)
733   ret i32 %val
736 define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) {
737   ; GFX6-LABEL: name: s_buffer_load_i32_offset_255
738   ; GFX6: bb.1 (%ir-block.0):
739   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
740   ; GFX6-NEXT: {{  $}}
741   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
742   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
743   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
744   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
745   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
746   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
747   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
748   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
749   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
750   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
751   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
752   ;
753   ; GFX7-LABEL: name: s_buffer_load_i32_offset_255
754   ; GFX7: bb.1 (%ir-block.0):
755   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
756   ; GFX7-NEXT: {{  $}}
757   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
758   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
759   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
760   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
761   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
762   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
763   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
764   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
765   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
766   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
767   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
768   ;
769   ; GFX8-LABEL: name: s_buffer_load_i32_offset_255
770   ; GFX8: bb.1 (%ir-block.0):
771   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
772   ; GFX8-NEXT: {{  $}}
773   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
774   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
775   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
776   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
777   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
778   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
779   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
780   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
781   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
782   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
783   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0)
784   ret i32 %val
787 define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) {
788   ; GFX6-LABEL: name: s_buffer_load_i32_offset_256
789   ; GFX6: bb.1 (%ir-block.0):
790   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
791   ; GFX6-NEXT: {{  $}}
792   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
793   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
794   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
795   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
796   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
797   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32))
798   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
799   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
800   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
801   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
802   ;
803   ; GFX7-LABEL: name: s_buffer_load_i32_offset_256
804   ; GFX7: bb.1 (%ir-block.0):
805   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
806   ; GFX7-NEXT: {{  $}}
807   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
808   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
809   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
810   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
811   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
812   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32))
813   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
814   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
815   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
816   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
817   ;
818   ; GFX8-LABEL: name: s_buffer_load_i32_offset_256
819   ; GFX8: bb.1 (%ir-block.0):
820   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
821   ; GFX8-NEXT: {{  $}}
822   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
823   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
824   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
825   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
826   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
827   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32))
828   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
829   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
830   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
831   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
832   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0)
833   ret i32 %val
836 define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) {
837   ; GFX6-LABEL: name: s_buffer_load_i32_offset_1020
838   ; GFX6: bb.1 (%ir-block.0):
839   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
840   ; GFX6-NEXT: {{  $}}
841   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
842   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
843   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
844   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
845   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
846   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
847   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
848   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
849   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
850   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
851   ;
852   ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020
853   ; GFX7: bb.1 (%ir-block.0):
854   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
855   ; GFX7-NEXT: {{  $}}
856   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
857   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
858   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
859   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
860   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
861   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
862   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
863   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
864   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
865   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
866   ;
867   ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020
868   ; GFX8: bb.1 (%ir-block.0):
869   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
870   ; GFX8-NEXT: {{  $}}
871   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
872   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
873   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
874   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
875   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
876   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32))
877   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
878   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
879   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
880   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
881   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0)
882   ret i32 %val
885 define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) {
886   ; GFX6-LABEL: name: s_buffer_load_i32_offset_1023
887   ; GFX6: bb.1 (%ir-block.0):
888   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
889   ; GFX6-NEXT: {{  $}}
890   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
891   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
892   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
893   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
894   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
895   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
896   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
897   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
898   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
899   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
900   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
901   ;
902   ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023
903   ; GFX7: bb.1 (%ir-block.0):
904   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
905   ; GFX7-NEXT: {{  $}}
906   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
907   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
908   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
909   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
910   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
911   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
912   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
913   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
914   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
915   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
916   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
917   ;
918   ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023
919   ; GFX8: bb.1 (%ir-block.0):
920   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
921   ; GFX8-NEXT: {{  $}}
922   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
923   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
924   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
925   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
926   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
927   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32))
928   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
929   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
930   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
931   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
932   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0)
933   ret i32 %val
936 define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) {
937   ; GFX6-LABEL: name: s_buffer_load_i32_offset_1024
938   ; GFX6: bb.1 (%ir-block.0):
939   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
940   ; GFX6-NEXT: {{  $}}
941   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
942   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
943   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
944   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
945   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
946   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
947   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
948   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
949   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
950   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
951   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
952   ;
953   ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024
954   ; GFX7: bb.1 (%ir-block.0):
955   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
956   ; GFX7-NEXT: {{  $}}
957   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
958   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
959   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
960   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
961   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
962   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32))
963   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
964   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
965   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
966   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
967   ;
968   ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024
969   ; GFX8: bb.1 (%ir-block.0):
970   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
971   ; GFX8-NEXT: {{  $}}
972   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
973   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
974   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
975   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
976   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
977   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32))
978   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
979   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
980   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
981   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
982   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0)
983   ret i32 %val
986 define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) {
987   ; GFX6-LABEL: name: s_buffer_load_i32_offset_1025
988   ; GFX6: bb.1 (%ir-block.0):
989   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
990   ; GFX6-NEXT: {{  $}}
991   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
992   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
993   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
994   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
995   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
996   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
997   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
998   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
999   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1000   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1001   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1002   ;
1003   ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025
1004   ; GFX7: bb.1 (%ir-block.0):
1005   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1006   ; GFX7-NEXT: {{  $}}
1007   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1008   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1009   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1010   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1011   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1012   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
1013   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1014   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1015   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1016   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1017   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1018   ;
1019   ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025
1020   ; GFX8: bb.1 (%ir-block.0):
1021   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1022   ; GFX8-NEXT: {{  $}}
1023   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1024   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1025   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1026   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1027   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1028   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32))
1029   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
1030   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1031   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1032   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1033   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0)
1034   ret i32 %val
1037 define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
1038   ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg1
1039   ; GFX6: bb.1 (%ir-block.0):
1040   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1041   ; GFX6-NEXT: {{  $}}
1042   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1043   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1044   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1045   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1046   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1047   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
1048   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1049   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1050   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1051   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1052   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1053   ;
1054   ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1
1055   ; GFX7: bb.1 (%ir-block.0):
1056   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1057   ; GFX7-NEXT: {{  $}}
1058   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1059   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1060   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1061   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1062   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1063   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
1064   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1065   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1066   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1067   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1068   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1069   ;
1070   ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1
1071   ; GFX8: bb.1 (%ir-block.0):
1072   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1073   ; GFX8-NEXT: {{  $}}
1074   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1075   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1076   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1077   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1078   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1079   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
1080   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1081   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1082   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1083   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1084   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1085   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
1086   ret i32 %load
1089 define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
1090   ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg4
1091   ; GFX6: bb.1 (%ir-block.0):
1092   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1093   ; GFX6-NEXT: {{  $}}
1094   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1095   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1096   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1097   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1098   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1099   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
1100   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1101   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1102   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1103   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1104   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1105   ;
1106   ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4
1107   ; GFX7: bb.1 (%ir-block.0):
1108   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1109   ; GFX7-NEXT: {{  $}}
1110   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1111   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1112   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1113   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1114   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1115   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32))
1116   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1117   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1118   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1119   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1120   ;
1121   ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4
1122   ; GFX8: bb.1 (%ir-block.0):
1123   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1124   ; GFX8-NEXT: {{  $}}
1125   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1126   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1127   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1128   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1129   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1130   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
1131   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1132   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1133   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1134   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1135   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1136   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
1137   ret i32 %load
1140 define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
1141   ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg8
1142   ; GFX6: bb.1 (%ir-block.0):
1143   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1144   ; GFX6-NEXT: {{  $}}
1145   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1146   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1147   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1148   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1149   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1150   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
1151   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1152   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1153   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1154   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1155   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1156   ;
1157   ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8
1158   ; GFX7: bb.1 (%ir-block.0):
1159   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1160   ; GFX7-NEXT: {{  $}}
1161   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1162   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1163   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1164   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1165   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1166   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32))
1167   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1168   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1169   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1170   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1171   ;
1172   ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8
1173   ; GFX8: bb.1 (%ir-block.0):
1174   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1175   ; GFX8-NEXT: {{  $}}
1176   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1177   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1178   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1179   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1180   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1181   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
1182   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1183   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1184   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1185   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1186   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1187   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
1188   ret i32 %load
1191 define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
1192   ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit31
1193   ; GFX6: bb.1 (%ir-block.0):
1194   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1195   ; GFX6-NEXT: {{  $}}
1196   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1197   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1198   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1199   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1200   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1201   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
1202   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1203   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1204   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1205   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1206   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1207   ;
1208   ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31
1209   ; GFX7: bb.1 (%ir-block.0):
1210   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1211   ; GFX7-NEXT: {{  $}}
1212   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1213   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1214   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1215   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1216   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1217   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32))
1218   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1219   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1220   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1221   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1222   ;
1223   ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31
1224   ; GFX8: bb.1 (%ir-block.0):
1225   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1226   ; GFX8-NEXT: {{  $}}
1227   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1228   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1229   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1230   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1231   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1232   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
1233   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1234   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1235   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1236   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1237   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1238   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
1239   ret i32 %load
1242 define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) {
1243   ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1244   ; GFX6: bb.1 (%ir-block.0):
1245   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1246   ; GFX6-NEXT: {{  $}}
1247   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1248   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1249   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1250   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1251   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1252   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
1253   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32))
1254   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1255   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1256   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1257   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1258   ;
1259   ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1260   ; GFX7: bb.1 (%ir-block.0):
1261   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1262   ; GFX7-NEXT: {{  $}}
1263   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1264   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1265   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1266   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1267   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1268   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32))
1269   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1270   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1271   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1272   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1273   ;
1274   ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30
1275   ; GFX8: bb.1 (%ir-block.0):
1276   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1277   ; GFX8-NEXT: {{  $}}
1278   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1279   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1280   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1281   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1282   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1283   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
1284   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32))
1285   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1286   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1287   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1288   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1289   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1)
1290   ret i32 %load
1293 define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) {
1294   ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit29
1295   ; GFX6: bb.1 (%ir-block.0):
1296   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1297   ; GFX6-NEXT: {{  $}}
1298   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1299   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1300   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1301   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1302   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1303   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
1304   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1305   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1306   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1307   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1308   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1309   ;
1310   ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29
1311   ; GFX7: bb.1 (%ir-block.0):
1312   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1313   ; GFX7-NEXT: {{  $}}
1314   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1315   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1316   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1317   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1318   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1319   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32))
1320   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1321   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1322   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1323   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1324   ;
1325   ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29
1326   ; GFX8: bb.1 (%ir-block.0):
1327   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1328   ; GFX8-NEXT: {{  $}}
1329   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1330   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1331   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1332   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1333   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1334   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
1335   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1336   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1337   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1338   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1339   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1340   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
1341   ret i32 %load
1344 define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) {
1345   ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit21
1346   ; GFX6: bb.1 (%ir-block.0):
1347   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1348   ; GFX6-NEXT: {{  $}}
1349   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1350   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1351   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1352   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1353   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1354   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
1355   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1356   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1357   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1358   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1359   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1360   ;
1361   ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21
1362   ; GFX7: bb.1 (%ir-block.0):
1363   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1364   ; GFX7-NEXT: {{  $}}
1365   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1366   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1367   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1368   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1369   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1370   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32))
1371   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1372   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1373   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1374   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1375   ;
1376   ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21
1377   ; GFX8: bb.1 (%ir-block.0):
1378   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1379   ; GFX8-NEXT: {{  $}}
1380   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1381   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1382   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1383   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1384   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1385   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
1386   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1387   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1388   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1389   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1390   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1391   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
1392   ret i32 %load
1395 define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) {
1396   ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit20
1397   ; GFX6: bb.1 (%ir-block.0):
1398   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1399   ; GFX6-NEXT: {{  $}}
1400   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1401   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1402   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1403   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1404   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1405   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
1406   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1407   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1408   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1409   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1410   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1411   ;
1412   ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20
1413   ; GFX7: bb.1 (%ir-block.0):
1414   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1415   ; GFX7-NEXT: {{  $}}
1416   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1417   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1418   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1419   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1420   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1421   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32))
1422   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1423   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1424   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1425   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1426   ;
1427   ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20
1428   ; GFX8: bb.1 (%ir-block.0):
1429   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1430   ; GFX8-NEXT: {{  $}}
1431   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1432   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1433   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1434   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1435   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1436   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
1437   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1438   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1439   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1440   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1441   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1442   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
1443   ret i32 %load
1446 define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) {
1447   ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1448   ; GFX6: bb.1 (%ir-block.0):
1449   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1450   ; GFX6-NEXT: {{  $}}
1451   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1452   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1453   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1454   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1455   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1456   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
1457   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1458   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1459   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1460   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1461   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1462   ;
1463   ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1464   ; GFX7: bb.1 (%ir-block.0):
1465   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1466   ; GFX7-NEXT: {{  $}}
1467   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1468   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1469   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1470   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1471   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1472   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32))
1473   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1474   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1475   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1476   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1477   ;
1478   ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20
1479   ; GFX8: bb.1 (%ir-block.0):
1480   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1481   ; GFX8-NEXT: {{  $}}
1482   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1483   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1484   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1485   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1486   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1487   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
1488   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1489   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1490   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1491   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1492   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1493   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32  -1048576, i32 0)
1494   ret i32 %load
1497 define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) {
1498   ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit19
1499   ; GFX6: bb.1 (%ir-block.0):
1500   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1501   ; GFX6-NEXT: {{  $}}
1502   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1503   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1504   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1505   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1506   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1507   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288
1508   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1509   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1510   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1511   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1512   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1513   ;
1514   ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19
1515   ; GFX7: bb.1 (%ir-block.0):
1516   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1517   ; GFX7-NEXT: {{  $}}
1518   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1519   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1520   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1521   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1522   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1523   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32))
1524   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1525   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1526   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1527   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1528   ;
1529   ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19
1530   ; GFX8: bb.1 (%ir-block.0):
1531   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1532   ; GFX8-NEXT: {{  $}}
1533   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1534   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1535   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1536   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1537   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1538   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32))
1539   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
1540   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1541   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1542   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1543   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
1544   ret i32 %load
1547 define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) {
1548   ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1549   ; GFX6: bb.1 (%ir-block.0):
1550   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1551   ; GFX6-NEXT: {{  $}}
1552   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1553   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1554   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1555   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1556   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1557   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
1558   ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1559   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1560   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1561   ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1562   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1563   ;
1564   ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1565   ; GFX7: bb.1 (%ir-block.0):
1566   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1567   ; GFX7-NEXT: {{  $}}
1568   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1569   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1570   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1571   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1572   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1573   ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32))
1574   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
1575   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1576   ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1577   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1578   ;
1579   ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19
1580   ; GFX8: bb.1 (%ir-block.0):
1581   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
1582   ; GFX8-NEXT: {{  $}}
1583   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1584   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1585   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1586   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1587   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1588   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
1589   ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
1590   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
1591   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
1592   ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
1593   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
1594   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
1595   ret i32 %load
1598 ; Check cases that need to be converted to MUBUF due to the offset being a VGPR.
1599 define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1600   ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset
1601   ; GFX6: bb.1 (%ir-block.0):
1602   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1603   ; GFX6-NEXT: {{  $}}
1604   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1605   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1606   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1607   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1608   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1609   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1610   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1611   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
1612   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1613   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
1614   ;
1615   ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset
1616   ; GFX7: bb.1 (%ir-block.0):
1617   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1618   ; GFX7-NEXT: {{  $}}
1619   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1620   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1621   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1622   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1623   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1624   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1625   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1626   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
1627   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1628   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
1629   ;
1630   ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset
1631   ; GFX8: bb.1 (%ir-block.0):
1632   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1633   ; GFX8-NEXT: {{  $}}
1634   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1635   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1636   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1637   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1638   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1639   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1640   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1641   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
1642   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
1643   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
1644   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1645   ret float %val
1648 define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1649   ; GFX6-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1650   ; GFX6: bb.1 (%ir-block.0):
1651   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1652   ; GFX6-NEXT: {{  $}}
1653   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1654   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1655   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1656   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1657   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1658   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1659   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1660   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
1661   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1662   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1663   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY5]]
1664   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY6]]
1665   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1666   ;
1667   ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1668   ; GFX7: bb.1 (%ir-block.0):
1669   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1670   ; GFX7-NEXT: {{  $}}
1671   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1672   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1673   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1674   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1675   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1676   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1677   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1678   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
1679   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1680   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1681   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY5]]
1682   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY6]]
1683   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1684   ;
1685   ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset
1686   ; GFX8: bb.1 (%ir-block.0):
1687   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1688   ; GFX8-NEXT: {{  $}}
1689   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1690   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1691   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1692   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1693   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1694   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1695   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1696   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
1697   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
1698   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
1699   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY5]]
1700   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY6]]
1701   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1702   %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1703   ret <2 x float> %val
1706 define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1707   ; GFX6-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1708   ; GFX6: bb.1 (%ir-block.0):
1709   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1710   ; GFX6-NEXT: {{  $}}
1711   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1712   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1713   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1714   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1715   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1716   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1717   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1718   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1719   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1720   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1721   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1722   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1723   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY5]]
1724   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY6]]
1725   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY7]]
1726   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1727   ;
1728   ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1729   ; GFX7: bb.1 (%ir-block.0):
1730   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1731   ; GFX7-NEXT: {{  $}}
1732   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1733   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1734   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1735   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1736   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1737   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1738   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1739   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1740   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1741   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1742   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1743   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1744   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY5]]
1745   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY6]]
1746   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY7]]
1747   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1748   ;
1749   ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset
1750   ; GFX8: bb.1 (%ir-block.0):
1751   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1752   ; GFX8-NEXT: {{  $}}
1753   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1754   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1755   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1756   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1757   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1758   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1759   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1760   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1761   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1762   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1763   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1764   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1765   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY5]]
1766   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY6]]
1767   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY7]]
1768   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
1769   %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1770   ret <3 x float> %val
1773 define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1774   ; GFX6-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1775   ; GFX6: bb.1 (%ir-block.0):
1776   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1777   ; GFX6-NEXT: {{  $}}
1778   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1779   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1780   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1781   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1782   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1783   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1784   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1785   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1786   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1787   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1788   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1789   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1790   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY5]]
1791   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY6]]
1792   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY7]]
1793   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY8]]
1794   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1795   ;
1796   ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1797   ; GFX7: bb.1 (%ir-block.0):
1798   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1799   ; GFX7-NEXT: {{  $}}
1800   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1801   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1802   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1803   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1804   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1805   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1806   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1807   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1808   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1809   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1810   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1811   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1812   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY5]]
1813   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY6]]
1814   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY7]]
1815   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY8]]
1816   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1817   ;
1818   ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset
1819   ; GFX8: bb.1 (%ir-block.0):
1820   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1821   ; GFX8-NEXT: {{  $}}
1822   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1823   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1824   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1825   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1826   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1827   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1828   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1829   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1830   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
1831   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
1832   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
1833   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
1834   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY5]]
1835   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY6]]
1836   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY7]]
1837   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY8]]
1838   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1839   %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1840   ret <4 x float> %val
1843 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1844   ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1845   ; GFX6: bb.1 (%ir-block.0):
1846   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1847   ; GFX6-NEXT: {{  $}}
1848   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1849   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1850   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1851   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1852   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1853   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1854   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1855   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1856   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1857   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1858   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1859   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1860   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1861   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1862   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1863   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1864   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1865   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1866   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY5]]
1867   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY6]]
1868   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY7]]
1869   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY8]]
1870   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY9]]
1871   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY10]]
1872   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY11]]
1873   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY12]]
1874   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1875   ;
1876   ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1877   ; GFX7: bb.1 (%ir-block.0):
1878   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1879   ; GFX7-NEXT: {{  $}}
1880   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1881   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1882   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1883   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1884   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1885   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1886   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1887   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1888   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1889   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1890   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1891   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1892   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1893   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1894   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1895   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1896   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1897   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1898   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY5]]
1899   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY6]]
1900   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY7]]
1901   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY8]]
1902   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY9]]
1903   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY10]]
1904   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY11]]
1905   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY12]]
1906   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1907   ;
1908   ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset
1909   ; GFX8: bb.1 (%ir-block.0):
1910   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1911   ; GFX8-NEXT: {{  $}}
1912   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1913   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1914   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1915   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1916   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1917   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1918   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1919   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1920   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1921   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
1922   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1923   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1924   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1925   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1926   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1927   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1928   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1929   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1930   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY5]]
1931   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY6]]
1932   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY7]]
1933   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY8]]
1934   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY9]]
1935   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY10]]
1936   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY11]]
1937   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY12]]
1938   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
1939   %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
1940   ret <8 x float> %val
1943 define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
1944   ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset
1945   ; GFX6: bb.1 (%ir-block.0):
1946   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1947   ; GFX6-NEXT: {{  $}}
1948   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1949   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
1950   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
1951   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
1952   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
1953   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1954   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1955   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1956   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
1957   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
1958   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
1959   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
1960   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
1961   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
1962   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
1963   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
1964   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
1965   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
1966   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
1967   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
1968   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
1969   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
1970   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
1971   ; GFX6-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
1972   ; GFX6-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
1973   ; GFX6-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
1974   ; GFX6-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
1975   ; GFX6-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
1976   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY5]]
1977   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY6]]
1978   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY7]]
1979   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY8]]
1980   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY9]]
1981   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY10]]
1982   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY11]]
1983   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY12]]
1984   ; GFX6-NEXT:   $vgpr8 = COPY [[COPY13]]
1985   ; GFX6-NEXT:   $vgpr9 = COPY [[COPY14]]
1986   ; GFX6-NEXT:   $vgpr10 = COPY [[COPY15]]
1987   ; GFX6-NEXT:   $vgpr11 = COPY [[COPY16]]
1988   ; GFX6-NEXT:   $vgpr12 = COPY [[COPY17]]
1989   ; GFX6-NEXT:   $vgpr13 = COPY [[COPY18]]
1990   ; GFX6-NEXT:   $vgpr14 = COPY [[COPY19]]
1991   ; GFX6-NEXT:   $vgpr15 = COPY [[COPY20]]
1992   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
1993   ;
1994   ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset
1995   ; GFX7: bb.1 (%ir-block.0):
1996   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
1997   ; GFX7-NEXT: {{  $}}
1998   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
1999   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2000   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2001   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2002   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2003   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2004   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2005   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2006   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2007   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2008   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2009   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2010   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2011   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2012   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2013   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2014   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2015   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2016   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2017   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2018   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2019   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2020   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2021   ; GFX7-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2022   ; GFX7-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2023   ; GFX7-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2024   ; GFX7-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2025   ; GFX7-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2026   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY5]]
2027   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY6]]
2028   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY7]]
2029   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY8]]
2030   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY9]]
2031   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY10]]
2032   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY11]]
2033   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY12]]
2034   ; GFX7-NEXT:   $vgpr8 = COPY [[COPY13]]
2035   ; GFX7-NEXT:   $vgpr9 = COPY [[COPY14]]
2036   ; GFX7-NEXT:   $vgpr10 = COPY [[COPY15]]
2037   ; GFX7-NEXT:   $vgpr11 = COPY [[COPY16]]
2038   ; GFX7-NEXT:   $vgpr12 = COPY [[COPY17]]
2039   ; GFX7-NEXT:   $vgpr13 = COPY [[COPY18]]
2040   ; GFX7-NEXT:   $vgpr14 = COPY [[COPY19]]
2041   ; GFX7-NEXT:   $vgpr15 = COPY [[COPY20]]
2042   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2043   ;
2044   ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset
2045   ; GFX8: bb.1 (%ir-block.0):
2046   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2047   ; GFX8-NEXT: {{  $}}
2048   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2049   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2050   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2051   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2052   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2053   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2054   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2055   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2056   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2057   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2058   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2059   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2060   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2061   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2062   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2063   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2064   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2065   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2066   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2067   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2068   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2069   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2070   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2071   ; GFX8-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2072   ; GFX8-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2073   ; GFX8-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2074   ; GFX8-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2075   ; GFX8-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2076   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY5]]
2077   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY6]]
2078   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY7]]
2079   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY8]]
2080   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY9]]
2081   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY10]]
2082   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY11]]
2083   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY12]]
2084   ; GFX8-NEXT:   $vgpr8 = COPY [[COPY13]]
2085   ; GFX8-NEXT:   $vgpr9 = COPY [[COPY14]]
2086   ; GFX8-NEXT:   $vgpr10 = COPY [[COPY15]]
2087   ; GFX8-NEXT:   $vgpr11 = COPY [[COPY16]]
2088   ; GFX8-NEXT:   $vgpr12 = COPY [[COPY17]]
2089   ; GFX8-NEXT:   $vgpr13 = COPY [[COPY18]]
2090   ; GFX8-NEXT:   $vgpr14 = COPY [[COPY19]]
2091   ; GFX8-NEXT:   $vgpr15 = COPY [[COPY20]]
2092   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2093   %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2094   ret <16 x float> %val
2097 define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2098   ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
2099   ; GFX6: bb.1 (%ir-block.0):
2100   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2101   ; GFX6-NEXT: {{  $}}
2102   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2103   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2104   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2105   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2106   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2107   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2108   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2109   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2110   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2111   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2112   ;
2113   ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
2114   ; GFX7: bb.1 (%ir-block.0):
2115   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2116   ; GFX7-NEXT: {{  $}}
2117   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2118   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2119   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2120   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2121   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2122   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2123   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2124   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2125   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2126   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2127   ;
2128   ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
2129   ; GFX8: bb.1 (%ir-block.0):
2130   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2131   ; GFX8-NEXT: {{  $}}
2132   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2133   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2134   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2135   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2136   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2137   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2138   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2139   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2140   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2141   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2142   %soffset = add i32 %soffset.base, 4092
2143   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2144   ret float %val
2147 define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2148   ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
2149   ; GFX6: bb.1 (%ir-block.0):
2150   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2151   ; GFX6-NEXT: {{  $}}
2152   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2153   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2154   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2155   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2156   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2157   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2158   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2159   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2160   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2161   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2162   ;
2163   ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
2164   ; GFX7: bb.1 (%ir-block.0):
2165   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2166   ; GFX7-NEXT: {{  $}}
2167   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2168   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2169   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2170   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2171   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2172   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2173   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2174   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2175   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2176   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2177   ;
2178   ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
2179   ; GFX8: bb.1 (%ir-block.0):
2180   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2181   ; GFX8-NEXT: {{  $}}
2182   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2183   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2184   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2185   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2186   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2187   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2188   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2189   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2190   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2191   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2192   %soffset = add i32 %soffset.base, 4095
2193   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2194   ret float %val
2197 define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2198   ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2199   ; GFX6: bb.1 (%ir-block.0):
2200   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2201   ; GFX6-NEXT: {{  $}}
2202   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2203   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2204   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2205   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2206   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2207   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2208   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2209   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2210   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2211   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2212   ;
2213   ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2214   ; GFX7: bb.1 (%ir-block.0):
2215   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2216   ; GFX7-NEXT: {{  $}}
2217   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2218   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2219   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2220   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2221   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2222   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2223   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
2224   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2225   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2226   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2227   ;
2228   ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
2229   ; GFX8: bb.1 (%ir-block.0):
2230   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2231   ; GFX8-NEXT: {{  $}}
2232   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2233   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2234   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2235   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2236   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2237   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2238   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
2239   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2240   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2241   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2242   %soffset = add i32 %soffset.base, 4096
2243   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2244   ret float %val
2247 ; Make sure the base offset is added to each split load.
2248 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2249   ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2250   ; GFX6: bb.1 (%ir-block.0):
2251   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2252   ; GFX6-NEXT: {{  $}}
2253   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2254   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2255   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2256   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2257   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2258   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2259   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2260   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2261   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2262   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2263   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2264   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2265   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2266   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2267   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2268   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2269   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2270   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2271   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY5]]
2272   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY6]]
2273   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY7]]
2274   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY8]]
2275   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY9]]
2276   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY10]]
2277   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY11]]
2278   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY12]]
2279   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2280   ;
2281   ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2282   ; GFX7: bb.1 (%ir-block.0):
2283   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2284   ; GFX7-NEXT: {{  $}}
2285   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2286   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2287   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2288   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2289   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2290   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2291   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2292   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2293   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2294   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2295   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2296   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2297   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2298   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2299   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2300   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2301   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2302   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2303   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY5]]
2304   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY6]]
2305   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY7]]
2306   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY8]]
2307   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY9]]
2308   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY10]]
2309   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY11]]
2310   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY12]]
2311   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2312   ;
2313   ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
2314   ; GFX8: bb.1 (%ir-block.0):
2315   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2316   ; GFX8-NEXT: {{  $}}
2317   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2318   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2319   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2320   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2321   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2322   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2323   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2324   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2325   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2326   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2327   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2328   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2329   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2330   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2331   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2332   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2333   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2334   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2335   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY5]]
2336   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY6]]
2337   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY7]]
2338   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY8]]
2339   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY9]]
2340   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY10]]
2341   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY11]]
2342   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY12]]
2343   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2344   %soffset = add i32 %soffset.base, 4064
2345   %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2346   ret <8 x float> %val
2349 ; Make sure the maximum offset isn't exeeded when splitting this
2350 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2351   ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2352   ; GFX6: bb.1 (%ir-block.0):
2353   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2354   ; GFX6-NEXT: {{  $}}
2355   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2356   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2357   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2358   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2359   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2360   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2361   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
2362   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2363   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2364   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2365   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2366   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2367   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2368   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2369   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2370   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2371   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2372   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2373   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY5]]
2374   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY6]]
2375   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY7]]
2376   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY8]]
2377   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY9]]
2378   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY10]]
2379   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY11]]
2380   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY12]]
2381   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2382   ;
2383   ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2384   ; GFX7: bb.1 (%ir-block.0):
2385   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2386   ; GFX7-NEXT: {{  $}}
2387   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2388   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2389   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2390   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2391   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2392   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2393   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
2394   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2395   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2396   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2397   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2398   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2399   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2400   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2401   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2402   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2403   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2404   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2405   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY5]]
2406   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY6]]
2407   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY7]]
2408   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY8]]
2409   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY9]]
2410   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY10]]
2411   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY11]]
2412   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY12]]
2413   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2414   ;
2415   ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
2416   ; GFX8: bb.1 (%ir-block.0):
2417   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2418   ; GFX8-NEXT: {{  $}}
2419   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2420   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2421   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2422   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2423   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2424   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2425   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
2426   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2427   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2428   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
2429   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2430   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2431   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2432   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2433   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2434   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2435   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2436   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2437   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY5]]
2438   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY6]]
2439   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY7]]
2440   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY8]]
2441   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY9]]
2442   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY10]]
2443   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY11]]
2444   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY12]]
2445   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
2446   %soffset = add i32 %soffset.base, 4068
2447   %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2448   ret <8 x float> %val
2451 define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2452   ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2453   ; GFX6: bb.1 (%ir-block.0):
2454   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2455   ; GFX6-NEXT: {{  $}}
2456   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2457   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2458   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2459   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2460   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2461   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2462   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2463   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2464   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2465   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2466   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2467   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2468   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2469   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2470   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2471   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2472   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2473   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2474   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2475   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2476   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2477   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2478   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2479   ; GFX6-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2480   ; GFX6-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2481   ; GFX6-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2482   ; GFX6-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2483   ; GFX6-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2484   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY5]]
2485   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY6]]
2486   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY7]]
2487   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY8]]
2488   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY9]]
2489   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY10]]
2490   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY11]]
2491   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY12]]
2492   ; GFX6-NEXT:   $vgpr8 = COPY [[COPY13]]
2493   ; GFX6-NEXT:   $vgpr9 = COPY [[COPY14]]
2494   ; GFX6-NEXT:   $vgpr10 = COPY [[COPY15]]
2495   ; GFX6-NEXT:   $vgpr11 = COPY [[COPY16]]
2496   ; GFX6-NEXT:   $vgpr12 = COPY [[COPY17]]
2497   ; GFX6-NEXT:   $vgpr13 = COPY [[COPY18]]
2498   ; GFX6-NEXT:   $vgpr14 = COPY [[COPY19]]
2499   ; GFX6-NEXT:   $vgpr15 = COPY [[COPY20]]
2500   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2501   ;
2502   ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2503   ; GFX7: bb.1 (%ir-block.0):
2504   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2505   ; GFX7-NEXT: {{  $}}
2506   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2507   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2508   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2509   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2510   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2511   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2512   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2513   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2514   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2515   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2516   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2517   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2518   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2519   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2520   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2521   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2522   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2523   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2524   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2525   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2526   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2527   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2528   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2529   ; GFX7-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2530   ; GFX7-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2531   ; GFX7-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2532   ; GFX7-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2533   ; GFX7-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2534   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY5]]
2535   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY6]]
2536   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY7]]
2537   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY8]]
2538   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY9]]
2539   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY10]]
2540   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY11]]
2541   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY12]]
2542   ; GFX7-NEXT:   $vgpr8 = COPY [[COPY13]]
2543   ; GFX7-NEXT:   $vgpr9 = COPY [[COPY14]]
2544   ; GFX7-NEXT:   $vgpr10 = COPY [[COPY15]]
2545   ; GFX7-NEXT:   $vgpr11 = COPY [[COPY16]]
2546   ; GFX7-NEXT:   $vgpr12 = COPY [[COPY17]]
2547   ; GFX7-NEXT:   $vgpr13 = COPY [[COPY18]]
2548   ; GFX7-NEXT:   $vgpr14 = COPY [[COPY19]]
2549   ; GFX7-NEXT:   $vgpr15 = COPY [[COPY20]]
2550   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2551   ;
2552   ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
2553   ; GFX8: bb.1 (%ir-block.0):
2554   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2555   ; GFX8-NEXT: {{  $}}
2556   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2557   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2558   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2559   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2560   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2561   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2562   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2563   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2564   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2565   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2566   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2567   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2568   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2569   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2570   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2571   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2572   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2573   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2574   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2575   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2576   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2577   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2578   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2579   ; GFX8-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2580   ; GFX8-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2581   ; GFX8-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2582   ; GFX8-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2583   ; GFX8-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2584   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY5]]
2585   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY6]]
2586   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY7]]
2587   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY8]]
2588   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY9]]
2589   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY10]]
2590   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY11]]
2591   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY12]]
2592   ; GFX8-NEXT:   $vgpr8 = COPY [[COPY13]]
2593   ; GFX8-NEXT:   $vgpr9 = COPY [[COPY14]]
2594   ; GFX8-NEXT:   $vgpr10 = COPY [[COPY15]]
2595   ; GFX8-NEXT:   $vgpr11 = COPY [[COPY16]]
2596   ; GFX8-NEXT:   $vgpr12 = COPY [[COPY17]]
2597   ; GFX8-NEXT:   $vgpr13 = COPY [[COPY18]]
2598   ; GFX8-NEXT:   $vgpr14 = COPY [[COPY19]]
2599   ; GFX8-NEXT:   $vgpr15 = COPY [[COPY20]]
2600   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2601   %soffset = add i32 %soffset.base, 4032
2602   %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2603   ret <16 x float> %val
2606 define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) {
2607   ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2608   ; GFX6: bb.1 (%ir-block.0):
2609   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2610   ; GFX6-NEXT: {{  $}}
2611   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2612   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2613   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2614   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2615   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2616   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2617   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
2618   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2619   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2620   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2621   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2622   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2623   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2624   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2625   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2626   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2627   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2628   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2629   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2630   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2631   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2632   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2633   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2634   ; GFX6-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2635   ; GFX6-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2636   ; GFX6-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2637   ; GFX6-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2638   ; GFX6-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2639   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY5]]
2640   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY6]]
2641   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY7]]
2642   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY8]]
2643   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY9]]
2644   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY10]]
2645   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY11]]
2646   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY12]]
2647   ; GFX6-NEXT:   $vgpr8 = COPY [[COPY13]]
2648   ; GFX6-NEXT:   $vgpr9 = COPY [[COPY14]]
2649   ; GFX6-NEXT:   $vgpr10 = COPY [[COPY15]]
2650   ; GFX6-NEXT:   $vgpr11 = COPY [[COPY16]]
2651   ; GFX6-NEXT:   $vgpr12 = COPY [[COPY17]]
2652   ; GFX6-NEXT:   $vgpr13 = COPY [[COPY18]]
2653   ; GFX6-NEXT:   $vgpr14 = COPY [[COPY19]]
2654   ; GFX6-NEXT:   $vgpr15 = COPY [[COPY20]]
2655   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2656   ;
2657   ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2658   ; GFX7: bb.1 (%ir-block.0):
2659   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2660   ; GFX7-NEXT: {{  $}}
2661   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2662   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2663   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2664   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2665   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2666   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2667   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
2668   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2669   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2670   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2671   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2672   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2673   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2674   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2675   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2676   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2677   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2678   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2679   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2680   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2681   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2682   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2683   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2684   ; GFX7-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2685   ; GFX7-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2686   ; GFX7-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2687   ; GFX7-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2688   ; GFX7-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2689   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY5]]
2690   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY6]]
2691   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY7]]
2692   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY8]]
2693   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY9]]
2694   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY10]]
2695   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY11]]
2696   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY12]]
2697   ; GFX7-NEXT:   $vgpr8 = COPY [[COPY13]]
2698   ; GFX7-NEXT:   $vgpr9 = COPY [[COPY14]]
2699   ; GFX7-NEXT:   $vgpr10 = COPY [[COPY15]]
2700   ; GFX7-NEXT:   $vgpr11 = COPY [[COPY16]]
2701   ; GFX7-NEXT:   $vgpr12 = COPY [[COPY17]]
2702   ; GFX7-NEXT:   $vgpr13 = COPY [[COPY18]]
2703   ; GFX7-NEXT:   $vgpr14 = COPY [[COPY19]]
2704   ; GFX7-NEXT:   $vgpr15 = COPY [[COPY20]]
2705   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2706   ;
2707   ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
2708   ; GFX8: bb.1 (%ir-block.0):
2709   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
2710   ; GFX8-NEXT: {{  $}}
2711   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
2712   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
2713   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
2714   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
2715   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2716   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2717   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
2718   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2719   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
2720   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
2721   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
2722   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
2723   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
2724   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
2725   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
2726   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
2727   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
2728   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
2729   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
2730   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
2731   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
2732   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
2733   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
2734   ; GFX8-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
2735   ; GFX8-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
2736   ; GFX8-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
2737   ; GFX8-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
2738   ; GFX8-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
2739   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY5]]
2740   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY6]]
2741   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY7]]
2742   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY8]]
2743   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY9]]
2744   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY10]]
2745   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY11]]
2746   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY12]]
2747   ; GFX8-NEXT:   $vgpr8 = COPY [[COPY13]]
2748   ; GFX8-NEXT:   $vgpr9 = COPY [[COPY14]]
2749   ; GFX8-NEXT:   $vgpr10 = COPY [[COPY15]]
2750   ; GFX8-NEXT:   $vgpr11 = COPY [[COPY16]]
2751   ; GFX8-NEXT:   $vgpr12 = COPY [[COPY17]]
2752   ; GFX8-NEXT:   $vgpr13 = COPY [[COPY18]]
2753   ; GFX8-NEXT:   $vgpr14 = COPY [[COPY19]]
2754   ; GFX8-NEXT:   $vgpr15 = COPY [[COPY20]]
2755   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
2756   %soffset = add i32 %soffset.base, 4036
2757   %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2758   ret <16 x float> %val
2761 ; Waterfall loop due to resource being VGPR
2762 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) {
2763   ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2764   ; GFX6: bb.1 (%ir-block.0):
2765   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
2766   ; GFX6-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2767   ; GFX6-NEXT: {{  $}}
2768   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2769   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2770   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2771   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2772   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2773   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2774   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2775   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2776   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
2777   ; GFX6-NEXT: {{  $}}
2778   ; GFX6-NEXT: bb.2:
2779   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
2780   ; GFX6-NEXT: {{  $}}
2781   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
2782   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
2783   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
2784   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
2785   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2786   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2787   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2788   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
2789   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
2790   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
2791   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
2792   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
2793   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2794   ; GFX6-NEXT: {{  $}}
2795   ; GFX6-NEXT: bb.3:
2796   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
2797   ; GFX6-NEXT: {{  $}}
2798   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2799   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2800   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
2801   ; GFX6-NEXT: {{  $}}
2802   ; GFX6-NEXT: bb.4:
2803   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
2804   ; GFX6-NEXT: {{  $}}
2805   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
2806   ; GFX6-NEXT: {{  $}}
2807   ; GFX6-NEXT: bb.5:
2808   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2809   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2810   ;
2811   ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2812   ; GFX7: bb.1 (%ir-block.0):
2813   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
2814   ; GFX7-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2815   ; GFX7-NEXT: {{  $}}
2816   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2817   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2818   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2819   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2820   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2821   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2822   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2823   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2824   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
2825   ; GFX7-NEXT: {{  $}}
2826   ; GFX7-NEXT: bb.2:
2827   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
2828   ; GFX7-NEXT: {{  $}}
2829   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
2830   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
2831   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
2832   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
2833   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2834   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2835   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2836   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
2837   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
2838   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
2839   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
2840   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
2841   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2842   ; GFX7-NEXT: {{  $}}
2843   ; GFX7-NEXT: bb.3:
2844   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
2845   ; GFX7-NEXT: {{  $}}
2846   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2847   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2848   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
2849   ; GFX7-NEXT: {{  $}}
2850   ; GFX7-NEXT: bb.4:
2851   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
2852   ; GFX7-NEXT: {{  $}}
2853   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
2854   ; GFX7-NEXT: {{  $}}
2855   ; GFX7-NEXT: bb.5:
2856   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2857   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2858   ;
2859   ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc
2860   ; GFX8: bb.1 (%ir-block.0):
2861   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
2862   ; GFX8-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2863   ; GFX8-NEXT: {{  $}}
2864   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2865   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2866   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2867   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2868   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2869   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2870   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
2871   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
2872   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
2873   ; GFX8-NEXT: {{  $}}
2874   ; GFX8-NEXT: bb.2:
2875   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
2876   ; GFX8-NEXT: {{  $}}
2877   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
2878   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
2879   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
2880   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
2881   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2882   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2883   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2884   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
2885   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
2886   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
2887   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
2888   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
2889   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2890   ; GFX8-NEXT: {{  $}}
2891   ; GFX8-NEXT: bb.3:
2892   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
2893   ; GFX8-NEXT: {{  $}}
2894   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2895   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2896   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
2897   ; GFX8-NEXT: {{  $}}
2898   ; GFX8-NEXT: bb.4:
2899   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
2900   ; GFX8-NEXT: {{  $}}
2901   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
2902   ; GFX8-NEXT: {{  $}}
2903   ; GFX8-NEXT: bb.5:
2904   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
2905   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2906   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
2907   ret float %val
2910 ; Use the offset inside the waterfall loop
2911 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) {
2912   ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
2913   ; GFX6: bb.1 (%ir-block.0):
2914   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
2915   ; GFX6-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2916   ; GFX6-NEXT: {{  $}}
2917   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2918   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2919   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2920   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2921   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2922   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2923   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
2924   ; GFX6-NEXT: {{  $}}
2925   ; GFX6-NEXT: bb.2:
2926   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
2927   ; GFX6-NEXT: {{  $}}
2928   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
2929   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
2930   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
2931   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
2932   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2933   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2934   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2935   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
2936   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
2937   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
2938   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
2939   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
2940   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2941   ; GFX6-NEXT: {{  $}}
2942   ; GFX6-NEXT: bb.3:
2943   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
2944   ; GFX6-NEXT: {{  $}}
2945   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2946   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2947   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
2948   ; GFX6-NEXT: {{  $}}
2949   ; GFX6-NEXT: bb.4:
2950   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
2951   ; GFX6-NEXT: {{  $}}
2952   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
2953   ; GFX6-NEXT: {{  $}}
2954   ; GFX6-NEXT: bb.5:
2955   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
2956   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
2957   ;
2958   ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
2959   ; GFX7: bb.1 (%ir-block.0):
2960   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
2961   ; GFX7-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
2962   ; GFX7-NEXT: {{  $}}
2963   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2964   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2965   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
2966   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
2967   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
2968   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
2969   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
2970   ; GFX7-NEXT: {{  $}}
2971   ; GFX7-NEXT: bb.2:
2972   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
2973   ; GFX7-NEXT: {{  $}}
2974   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
2975   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
2976   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
2977   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
2978   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
2979   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
2980   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
2981   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
2982   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
2983   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
2984   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
2985   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
2986   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
2987   ; GFX7-NEXT: {{  $}}
2988   ; GFX7-NEXT: bb.3:
2989   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
2990   ; GFX7-NEXT: {{  $}}
2991   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
2992   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
2993   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
2994   ; GFX7-NEXT: {{  $}}
2995   ; GFX7-NEXT: bb.4:
2996   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
2997   ; GFX7-NEXT: {{  $}}
2998   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
2999   ; GFX7-NEXT: {{  $}}
3000   ; GFX7-NEXT: bb.5:
3001   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3002   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
3003   ;
3004   ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
3005   ; GFX8: bb.1 (%ir-block.0):
3006   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
3007   ; GFX8-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3008   ; GFX8-NEXT: {{  $}}
3009   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3010   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3011   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3012   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3013   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3014   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3015   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3016   ; GFX8-NEXT: {{  $}}
3017   ; GFX8-NEXT: bb.2:
3018   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
3019   ; GFX8-NEXT: {{  $}}
3020   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3021   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3022   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3023   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3024   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3025   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3026   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3027   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3028   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3029   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3030   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3031   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3032   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3033   ; GFX8-NEXT: {{  $}}
3034   ; GFX8-NEXT: bb.3:
3035   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3036   ; GFX8-NEXT: {{  $}}
3037   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3038   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3039   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3040   ; GFX8-NEXT: {{  $}}
3041   ; GFX8-NEXT: bb.4:
3042   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
3043   ; GFX8-NEXT: {{  $}}
3044   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3045   ; GFX8-NEXT: {{  $}}
3046   ; GFX8-NEXT: bb.5:
3047   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3048   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
3049   %soffset = add i32 %soffset.base, 4092
3050   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3051   ret float %val
3054 ; Scalar offset exceeds MUBUF limit, keep add out of the loop
3055 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3056   ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
3057   ; GFX6: bb.1 (%ir-block.0):
3058   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
3059   ; GFX6-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3060   ; GFX6-NEXT: {{  $}}
3061   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3062   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3063   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3064   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3065   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3066   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3067   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3068   ; GFX6-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3069   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3070   ; GFX6-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3071   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3072   ; GFX6-NEXT: {{  $}}
3073   ; GFX6-NEXT: bb.2:
3074   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
3075   ; GFX6-NEXT: {{  $}}
3076   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3077   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3078   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3079   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3080   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3081   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3082   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3083   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3084   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3085   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3086   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3087   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3088   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3089   ; GFX6-NEXT: {{  $}}
3090   ; GFX6-NEXT: bb.3:
3091   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3092   ; GFX6-NEXT: {{  $}}
3093   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3094   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3095   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3096   ; GFX6-NEXT: {{  $}}
3097   ; GFX6-NEXT: bb.4:
3098   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
3099   ; GFX6-NEXT: {{  $}}
3100   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3101   ; GFX6-NEXT: {{  $}}
3102   ; GFX6-NEXT: bb.5:
3103   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3104   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
3105   ;
3106   ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
3107   ; GFX7: bb.1 (%ir-block.0):
3108   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
3109   ; GFX7-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3110   ; GFX7-NEXT: {{  $}}
3111   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3112   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3113   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3114   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3115   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3116   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3117   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3118   ; GFX7-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3119   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3120   ; GFX7-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3121   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3122   ; GFX7-NEXT: {{  $}}
3123   ; GFX7-NEXT: bb.2:
3124   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
3125   ; GFX7-NEXT: {{  $}}
3126   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3127   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3128   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3129   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3130   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3131   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3132   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3133   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3134   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3135   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3136   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3137   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3138   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3139   ; GFX7-NEXT: {{  $}}
3140   ; GFX7-NEXT: bb.3:
3141   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3142   ; GFX7-NEXT: {{  $}}
3143   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3144   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3145   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3146   ; GFX7-NEXT: {{  $}}
3147   ; GFX7-NEXT: bb.4:
3148   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
3149   ; GFX7-NEXT: {{  $}}
3150   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3151   ; GFX7-NEXT: {{  $}}
3152   ; GFX7-NEXT: bb.5:
3153   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3154   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
3155   ;
3156   ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
3157   ; GFX8: bb.1 (%ir-block.0):
3158   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
3159   ; GFX8-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3160   ; GFX8-NEXT: {{  $}}
3161   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3162   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3163   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3164   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3165   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3166   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3167   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3168   ; GFX8-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3169   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3170   ; GFX8-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3171   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3172   ; GFX8-NEXT: {{  $}}
3173   ; GFX8-NEXT: bb.2:
3174   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
3175   ; GFX8-NEXT: {{  $}}
3176   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3177   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3178   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3179   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3180   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3181   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3182   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3183   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3184   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3185   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3186   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3187   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3188   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3189   ; GFX8-NEXT: {{  $}}
3190   ; GFX8-NEXT: bb.3:
3191   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3192   ; GFX8-NEXT: {{  $}}
3193   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3194   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3195   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3196   ; GFX8-NEXT: {{  $}}
3197   ; GFX8-NEXT: bb.4:
3198   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
3199   ; GFX8-NEXT: {{  $}}
3200   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3201   ; GFX8-NEXT: {{  $}}
3202   ; GFX8-NEXT: bb.5:
3203   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3204   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
3205   %soffset = add i32 %soffset.base, 4096
3206   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3207   ret float %val
3210 ; Waterfall loop, but constant offset
3211 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) {
3212   ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
3213   ; GFX6: bb.1 (%ir-block.0):
3214   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
3215   ; GFX6-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3216   ; GFX6-NEXT: {{  $}}
3217   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3218   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3219   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3220   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3221   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3222   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3223   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3224   ; GFX6-NEXT: {{  $}}
3225   ; GFX6-NEXT: bb.2:
3226   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
3227   ; GFX6-NEXT: {{  $}}
3228   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3229   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3230   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3231   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3232   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3233   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3234   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3235   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3236   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3237   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
3238   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3239   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3240   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3241   ; GFX6-NEXT: {{  $}}
3242   ; GFX6-NEXT: bb.3:
3243   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3244   ; GFX6-NEXT: {{  $}}
3245   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
3246   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3247   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3248   ; GFX6-NEXT: {{  $}}
3249   ; GFX6-NEXT: bb.4:
3250   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
3251   ; GFX6-NEXT: {{  $}}
3252   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3253   ; GFX6-NEXT: {{  $}}
3254   ; GFX6-NEXT: bb.5:
3255   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3256   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
3257   ;
3258   ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
3259   ; GFX7: bb.1 (%ir-block.0):
3260   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
3261   ; GFX7-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3262   ; GFX7-NEXT: {{  $}}
3263   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3264   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3265   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3266   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3267   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3268   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3269   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3270   ; GFX7-NEXT: {{  $}}
3271   ; GFX7-NEXT: bb.2:
3272   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
3273   ; GFX7-NEXT: {{  $}}
3274   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3275   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3276   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3277   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3278   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3279   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3280   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3281   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3282   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3283   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
3284   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3285   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3286   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3287   ; GFX7-NEXT: {{  $}}
3288   ; GFX7-NEXT: bb.3:
3289   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3290   ; GFX7-NEXT: {{  $}}
3291   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
3292   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3293   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3294   ; GFX7-NEXT: {{  $}}
3295   ; GFX7-NEXT: bb.4:
3296   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
3297   ; GFX7-NEXT: {{  $}}
3298   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3299   ; GFX7-NEXT: {{  $}}
3300   ; GFX7-NEXT: bb.5:
3301   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3302   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
3303   ;
3304   ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
3305   ; GFX8: bb.1 (%ir-block.0):
3306   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
3307   ; GFX8-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3308   ; GFX8-NEXT: {{  $}}
3309   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3310   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3311   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3312   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3313   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3314   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3315   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3316   ; GFX8-NEXT: {{  $}}
3317   ; GFX8-NEXT: bb.2:
3318   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
3319   ; GFX8-NEXT: {{  $}}
3320   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3321   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3322   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3323   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3324   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3325   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3326   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3327   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3328   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3329   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
3330   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3331   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3332   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3333   ; GFX8-NEXT: {{  $}}
3334   ; GFX8-NEXT: bb.3:
3335   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3336   ; GFX8-NEXT: {{  $}}
3337   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
3338   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3339   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3340   ; GFX8-NEXT: {{  $}}
3341   ; GFX8-NEXT: bb.4:
3342   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
3343   ; GFX8-NEXT: {{  $}}
3344   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3345   ; GFX8-NEXT: {{  $}}
3346   ; GFX8-NEXT: bb.5:
3347   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3348   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
3349   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0)
3350   ret float %val
3353 ; Waterfall loop, but constant offset
3354 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) {
3355   ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3356   ; GFX6: bb.1 (%ir-block.0):
3357   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
3358   ; GFX6-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3359   ; GFX6-NEXT: {{  $}}
3360   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3361   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3362   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3363   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3364   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3365   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3366   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
3367   ; GFX6-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3368   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3369   ; GFX6-NEXT: {{  $}}
3370   ; GFX6-NEXT: bb.2:
3371   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
3372   ; GFX6-NEXT: {{  $}}
3373   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3374   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3375   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3376   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3377   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3378   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3379   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3380   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3381   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3382   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3383   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3384   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3385   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3386   ; GFX6-NEXT: {{  $}}
3387   ; GFX6-NEXT: bb.3:
3388   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3389   ; GFX6-NEXT: {{  $}}
3390   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3391   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3392   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3393   ; GFX6-NEXT: {{  $}}
3394   ; GFX6-NEXT: bb.4:
3395   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
3396   ; GFX6-NEXT: {{  $}}
3397   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3398   ; GFX6-NEXT: {{  $}}
3399   ; GFX6-NEXT: bb.5:
3400   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3401   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
3402   ;
3403   ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3404   ; GFX7: bb.1 (%ir-block.0):
3405   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
3406   ; GFX7-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3407   ; GFX7-NEXT: {{  $}}
3408   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3409   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3410   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3411   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3412   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3413   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3414   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
3415   ; GFX7-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3416   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3417   ; GFX7-NEXT: {{  $}}
3418   ; GFX7-NEXT: bb.2:
3419   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
3420   ; GFX7-NEXT: {{  $}}
3421   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3422   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3423   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3424   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3425   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3426   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3427   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3428   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3429   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3430   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3431   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3432   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3433   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3434   ; GFX7-NEXT: {{  $}}
3435   ; GFX7-NEXT: bb.3:
3436   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3437   ; GFX7-NEXT: {{  $}}
3438   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
3439   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3440   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3441   ; GFX7-NEXT: {{  $}}
3442   ; GFX7-NEXT: bb.4:
3443   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
3444   ; GFX7-NEXT: {{  $}}
3445   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3446   ; GFX7-NEXT: {{  $}}
3447   ; GFX7-NEXT: bb.5:
3448   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
3449   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
3450   ;
3451   ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
3452   ; GFX8: bb.1 (%ir-block.0):
3453   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
3454   ; GFX8-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
3455   ; GFX8-NEXT: {{  $}}
3456   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3457   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3458   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3459   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3460   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3461   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
3462   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3463   ; GFX8-NEXT: {{  $}}
3464   ; GFX8-NEXT: bb.2:
3465   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
3466   ; GFX8-NEXT: {{  $}}
3467   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3468   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3469   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3470   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3471   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3472   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3473   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3474   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3475   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3476   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
3477   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3478   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3479   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3480   ; GFX8-NEXT: {{  $}}
3481   ; GFX8-NEXT: bb.3:
3482   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3483   ; GFX8-NEXT: {{  $}}
3484   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4095, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096)
3485   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3486   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3487   ; GFX8-NEXT: {{  $}}
3488   ; GFX8-NEXT: bb.4:
3489   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
3490   ; GFX8-NEXT: {{  $}}
3491   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3492   ; GFX8-NEXT: {{  $}}
3493   ; GFX8-NEXT: bb.5:
3494   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
3495   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
3496   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0)
3497   ret float %val
3500 ; Need a waterfall loop, but the offset is scalar.
3501 ; Make sure the base offset is added to each split load.
3502 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3503   ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3504   ; GFX6: bb.1 (%ir-block.0):
3505   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
3506   ; GFX6-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3507   ; GFX6-NEXT: {{  $}}
3508   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3509   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3510   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3511   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3512   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3513   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3514   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3515   ; GFX6-NEXT: {{  $}}
3516   ; GFX6-NEXT: bb.2:
3517   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
3518   ; GFX6-NEXT: {{  $}}
3519   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3520   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3521   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3522   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3523   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3524   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3525   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3526   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3527   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3528   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3529   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3530   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3531   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3532   ; GFX6-NEXT: {{  $}}
3533   ; GFX6-NEXT: bb.3:
3534   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3535   ; GFX6-NEXT: {{  $}}
3536   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3537   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3538   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3539   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3540   ; GFX6-NEXT: {{  $}}
3541   ; GFX6-NEXT: bb.4:
3542   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
3543   ; GFX6-NEXT: {{  $}}
3544   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3545   ; GFX6-NEXT: {{  $}}
3546   ; GFX6-NEXT: bb.5:
3547   ; GFX6-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3548   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3549   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3550   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3551   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3552   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3553   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3554   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3555   ; GFX6-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3556   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY9]]
3557   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY10]]
3558   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY11]]
3559   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY12]]
3560   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY13]]
3561   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY14]]
3562   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY15]]
3563   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY16]]
3564   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3565   ;
3566   ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3567   ; GFX7: bb.1 (%ir-block.0):
3568   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
3569   ; GFX7-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3570   ; GFX7-NEXT: {{  $}}
3571   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3572   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3573   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3574   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3575   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3576   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3577   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3578   ; GFX7-NEXT: {{  $}}
3579   ; GFX7-NEXT: bb.2:
3580   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
3581   ; GFX7-NEXT: {{  $}}
3582   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3583   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3584   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3585   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3586   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3587   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3588   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3589   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3590   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3591   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3592   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3593   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3594   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3595   ; GFX7-NEXT: {{  $}}
3596   ; GFX7-NEXT: bb.3:
3597   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3598   ; GFX7-NEXT: {{  $}}
3599   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3600   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3601   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3602   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3603   ; GFX7-NEXT: {{  $}}
3604   ; GFX7-NEXT: bb.4:
3605   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
3606   ; GFX7-NEXT: {{  $}}
3607   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3608   ; GFX7-NEXT: {{  $}}
3609   ; GFX7-NEXT: bb.5:
3610   ; GFX7-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3611   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3612   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3613   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3614   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3615   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3616   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3617   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3618   ; GFX7-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3619   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY9]]
3620   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY10]]
3621   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY11]]
3622   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY12]]
3623   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY13]]
3624   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY14]]
3625   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY15]]
3626   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY16]]
3627   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3628   ;
3629   ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
3630   ; GFX8: bb.1 (%ir-block.0):
3631   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
3632   ; GFX8-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3633   ; GFX8-NEXT: {{  $}}
3634   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3635   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3636   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3637   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3638   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3639   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3640   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3641   ; GFX8-NEXT: {{  $}}
3642   ; GFX8-NEXT: bb.2:
3643   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
3644   ; GFX8-NEXT: {{  $}}
3645   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3646   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3647   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3648   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3649   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3650   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3651   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3652   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3653   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3654   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
3655   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3656   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3657   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3658   ; GFX8-NEXT: {{  $}}
3659   ; GFX8-NEXT: bb.3:
3660   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3661   ; GFX8-NEXT: {{  $}}
3662   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3663   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3664   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3665   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3666   ; GFX8-NEXT: {{  $}}
3667   ; GFX8-NEXT: bb.4:
3668   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
3669   ; GFX8-NEXT: {{  $}}
3670   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3671   ; GFX8-NEXT: {{  $}}
3672   ; GFX8-NEXT: bb.5:
3673   ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
3674   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3675   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3676   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3677   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3678   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3679   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3680   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3681   ; GFX8-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3682   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY9]]
3683   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY10]]
3684   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY11]]
3685   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY12]]
3686   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY13]]
3687   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY14]]
3688   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY15]]
3689   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY16]]
3690   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3691   %soffset = add i32 %soffset.base, 4064
3692   %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3693   ret <8 x float> %val
3696 ; Need a waterfall loop, but the offset is scalar.
3697 ; Make sure the maximum offset isn't exeeded when splitting this
3698 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3699   ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3700   ; GFX6: bb.1 (%ir-block.0):
3701   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
3702   ; GFX6-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3703   ; GFX6-NEXT: {{  $}}
3704   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3705   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3706   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3707   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3708   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3709   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3710   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3711   ; GFX6-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3712   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3713   ; GFX6-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3714   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3715   ; GFX6-NEXT: {{  $}}
3716   ; GFX6-NEXT: bb.2:
3717   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
3718   ; GFX6-NEXT: {{  $}}
3719   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3720   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3721   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3722   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3723   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3724   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3725   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3726   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3727   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3728   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3729   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3730   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3731   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3732   ; GFX6-NEXT: {{  $}}
3733   ; GFX6-NEXT: bb.3:
3734   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3735   ; GFX6-NEXT: {{  $}}
3736   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3737   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3738   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3739   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3740   ; GFX6-NEXT: {{  $}}
3741   ; GFX6-NEXT: bb.4:
3742   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
3743   ; GFX6-NEXT: {{  $}}
3744   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3745   ; GFX6-NEXT: {{  $}}
3746   ; GFX6-NEXT: bb.5:
3747   ; GFX6-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3748   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3749   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3750   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3751   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3752   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3753   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3754   ; GFX6-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3755   ; GFX6-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3756   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY10]]
3757   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY11]]
3758   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY12]]
3759   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY13]]
3760   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY14]]
3761   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY15]]
3762   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY16]]
3763   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY17]]
3764   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3765   ;
3766   ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3767   ; GFX7: bb.1 (%ir-block.0):
3768   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
3769   ; GFX7-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3770   ; GFX7-NEXT: {{  $}}
3771   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3772   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3773   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3774   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3775   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3776   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3777   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3778   ; GFX7-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3779   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3780   ; GFX7-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3781   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3782   ; GFX7-NEXT: {{  $}}
3783   ; GFX7-NEXT: bb.2:
3784   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
3785   ; GFX7-NEXT: {{  $}}
3786   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3787   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3788   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3789   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3790   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3791   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3792   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3793   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3794   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3795   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3796   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3797   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3798   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3799   ; GFX7-NEXT: {{  $}}
3800   ; GFX7-NEXT: bb.3:
3801   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3802   ; GFX7-NEXT: {{  $}}
3803   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3804   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3805   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3806   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3807   ; GFX7-NEXT: {{  $}}
3808   ; GFX7-NEXT: bb.4:
3809   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
3810   ; GFX7-NEXT: {{  $}}
3811   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3812   ; GFX7-NEXT: {{  $}}
3813   ; GFX7-NEXT: bb.5:
3814   ; GFX7-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3815   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3816   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3817   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3818   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3819   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3820   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3821   ; GFX7-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3822   ; GFX7-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3823   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY10]]
3824   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY11]]
3825   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY12]]
3826   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY13]]
3827   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY14]]
3828   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY15]]
3829   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY16]]
3830   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY17]]
3831   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3832   ;
3833   ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
3834   ; GFX8: bb.1 (%ir-block.0):
3835   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
3836   ; GFX8-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3837   ; GFX8-NEXT: {{  $}}
3838   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3839   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3840   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3841   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3842   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3843   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3844   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
3845   ; GFX8-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3846   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3847   ; GFX8-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3848   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3849   ; GFX8-NEXT: {{  $}}
3850   ; GFX8-NEXT: bb.2:
3851   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
3852   ; GFX8-NEXT: {{  $}}
3853   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3854   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3855   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3856   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3857   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3858   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3859   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3860   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3861   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3862   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3863   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3864   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3865   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3866   ; GFX8-NEXT: {{  $}}
3867   ; GFX8-NEXT: bb.3:
3868   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3869   ; GFX8-NEXT: {{  $}}
3870   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3871   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3872   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3873   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3874   ; GFX8-NEXT: {{  $}}
3875   ; GFX8-NEXT: bb.4:
3876   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
3877   ; GFX8-NEXT: {{  $}}
3878   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3879   ; GFX8-NEXT: {{  $}}
3880   ; GFX8-NEXT: bb.5:
3881   ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3882   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3883   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3884   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3885   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3886   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3887   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3888   ; GFX8-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3889   ; GFX8-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3890   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY10]]
3891   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY11]]
3892   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY12]]
3893   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY13]]
3894   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY14]]
3895   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY15]]
3896   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY16]]
3897   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY17]]
3898   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3899   %soffset = add i32 %soffset.base, 4068
3900   %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
3901   ret <8 x float> %val
3904 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
3905   ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
3906   ; GFX6: bb.1 (%ir-block.0):
3907   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
3908   ; GFX6-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3909   ; GFX6-NEXT: {{  $}}
3910   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3911   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3912   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3913   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3914   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3915   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3916   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3917   ; GFX6-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3918   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3919   ; GFX6-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3920   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3921   ; GFX6-NEXT: {{  $}}
3922   ; GFX6-NEXT: bb.2:
3923   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
3924   ; GFX6-NEXT: {{  $}}
3925   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3926   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3927   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3928   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3929   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3930   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3931   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3932   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
3933   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
3934   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
3935   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
3936   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
3937   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3938   ; GFX6-NEXT: {{  $}}
3939   ; GFX6-NEXT: bb.3:
3940   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
3941   ; GFX6-NEXT: {{  $}}
3942   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3943   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
3944   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
3945   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
3946   ; GFX6-NEXT: {{  $}}
3947   ; GFX6-NEXT: bb.4:
3948   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
3949   ; GFX6-NEXT: {{  $}}
3950   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
3951   ; GFX6-NEXT: {{  $}}
3952   ; GFX6-NEXT: bb.5:
3953   ; GFX6-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
3954   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
3955   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
3956   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
3957   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
3958   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
3959   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
3960   ; GFX6-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
3961   ; GFX6-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
3962   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY10]]
3963   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY11]]
3964   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY12]]
3965   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY13]]
3966   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY14]]
3967   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY15]]
3968   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY16]]
3969   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY17]]
3970   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
3971   ;
3972   ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
3973   ; GFX7: bb.1 (%ir-block.0):
3974   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
3975   ; GFX7-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
3976   ; GFX7-NEXT: {{  $}}
3977   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3978   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3979   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3980   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
3981   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
3982   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
3983   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
3984   ; GFX7-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
3985   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
3986   ; GFX7-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
3987   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
3988   ; GFX7-NEXT: {{  $}}
3989   ; GFX7-NEXT: bb.2:
3990   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
3991   ; GFX7-NEXT: {{  $}}
3992   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
3993   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
3994   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3995   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
3996   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
3997   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
3998   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
3999   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4000   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4001   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4002   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
4003   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4004   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4005   ; GFX7-NEXT: {{  $}}
4006   ; GFX7-NEXT: bb.3:
4007   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4008   ; GFX7-NEXT: {{  $}}
4009   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4010   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4011   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4012   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4013   ; GFX7-NEXT: {{  $}}
4014   ; GFX7-NEXT: bb.4:
4015   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
4016   ; GFX7-NEXT: {{  $}}
4017   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4018   ; GFX7-NEXT: {{  $}}
4019   ; GFX7-NEXT: bb.5:
4020   ; GFX7-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4021   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4022   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4023   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4024   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4025   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4026   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4027   ; GFX7-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4028   ; GFX7-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4029   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY10]]
4030   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY11]]
4031   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY12]]
4032   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY13]]
4033   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY14]]
4034   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY15]]
4035   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY16]]
4036   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY17]]
4037   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4038   ;
4039   ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
4040   ; GFX8: bb.1 (%ir-block.0):
4041   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
4042   ; GFX8-NEXT:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
4043   ; GFX8-NEXT: {{  $}}
4044   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4045   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4046   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4047   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4048   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4049   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
4050   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
4051   ; GFX8-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
4052   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
4053   ; GFX8-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4054   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4055   ; GFX8-NEXT: {{  $}}
4056   ; GFX8-NEXT: bb.2:
4057   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
4058   ; GFX8-NEXT: {{  $}}
4059   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4060   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4061   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4062   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4063   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4064   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4065   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4066   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4067   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4068   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4069   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
4070   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4071   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4072   ; GFX8-NEXT: {{  $}}
4073   ; GFX8-NEXT: bb.3:
4074   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4075   ; GFX8-NEXT: {{  $}}
4076   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4077   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4078   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4079   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4080   ; GFX8-NEXT: {{  $}}
4081   ; GFX8-NEXT: bb.4:
4082   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
4083   ; GFX8-NEXT: {{  $}}
4084   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4085   ; GFX8-NEXT: {{  $}}
4086   ; GFX8-NEXT: bb.5:
4087   ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4088   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4089   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4090   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4091   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4092   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4093   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4094   ; GFX8-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4095   ; GFX8-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4096   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY10]]
4097   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY11]]
4098   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY12]]
4099   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY13]]
4100   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY14]]
4101   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY15]]
4102   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY16]]
4103   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY17]]
4104   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4105   %soffset = add i32 %soffset.base, 4096
4106   %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
4107   ret <8 x float> %val
4110 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) {
4111   ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
4112   ; GFX6: bb.1 (%ir-block.0):
4113   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
4114   ; GFX6-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4115   ; GFX6-NEXT: {{  $}}
4116   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4117   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4118   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4119   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4120   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4121   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4122   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
4123   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4124   ; GFX6-NEXT: {{  $}}
4125   ; GFX6-NEXT: bb.2:
4126   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
4127   ; GFX6-NEXT: {{  $}}
4128   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4129   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4130   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4131   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4132   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4133   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4134   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4135   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4136   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4137   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4138   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4139   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4140   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4141   ; GFX6-NEXT: {{  $}}
4142   ; GFX6-NEXT: bb.3:
4143   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4144   ; GFX6-NEXT: {{  $}}
4145   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4146   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4147   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4148   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4149   ; GFX6-NEXT: {{  $}}
4150   ; GFX6-NEXT: bb.4:
4151   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
4152   ; GFX6-NEXT: {{  $}}
4153   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4154   ; GFX6-NEXT: {{  $}}
4155   ; GFX6-NEXT: bb.5:
4156   ; GFX6-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4157   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4158   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4159   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4160   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4161   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4162   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4163   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4164   ; GFX6-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4165   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY9]]
4166   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY10]]
4167   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY11]]
4168   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY12]]
4169   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY13]]
4170   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY14]]
4171   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY15]]
4172   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY16]]
4173   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4174   ;
4175   ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
4176   ; GFX7: bb.1 (%ir-block.0):
4177   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
4178   ; GFX7-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4179   ; GFX7-NEXT: {{  $}}
4180   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4181   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4182   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4183   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4184   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4185   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4186   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
4187   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4188   ; GFX7-NEXT: {{  $}}
4189   ; GFX7-NEXT: bb.2:
4190   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
4191   ; GFX7-NEXT: {{  $}}
4192   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4193   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4194   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4195   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4196   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4197   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4198   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4199   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4200   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4201   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4202   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4203   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4204   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4205   ; GFX7-NEXT: {{  $}}
4206   ; GFX7-NEXT: bb.3:
4207   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4208   ; GFX7-NEXT: {{  $}}
4209   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4210   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4211   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4212   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4213   ; GFX7-NEXT: {{  $}}
4214   ; GFX7-NEXT: bb.4:
4215   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
4216   ; GFX7-NEXT: {{  $}}
4217   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4218   ; GFX7-NEXT: {{  $}}
4219   ; GFX7-NEXT: bb.5:
4220   ; GFX7-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4221   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4222   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4223   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4224   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4225   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4226   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4227   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4228   ; GFX7-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4229   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY9]]
4230   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY10]]
4231   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY11]]
4232   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY12]]
4233   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY13]]
4234   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY14]]
4235   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY15]]
4236   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY16]]
4237   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4238   ;
4239   ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
4240   ; GFX8: bb.1 (%ir-block.0):
4241   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
4242   ; GFX8-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4243   ; GFX8-NEXT: {{  $}}
4244   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4245   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4246   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4247   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4248   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4249   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4250   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064
4251   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4252   ; GFX8-NEXT: {{  $}}
4253   ; GFX8-NEXT: bb.2:
4254   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
4255   ; GFX8-NEXT: {{  $}}
4256   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4257   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4258   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4259   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4260   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4261   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4262   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4263   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4264   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4265   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4266   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4267   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4268   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4269   ; GFX8-NEXT: {{  $}}
4270   ; GFX8-NEXT: bb.3:
4271   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4272   ; GFX8-NEXT: {{  $}}
4273   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4274   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4275   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4276   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4277   ; GFX8-NEXT: {{  $}}
4278   ; GFX8-NEXT: bb.4:
4279   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
4280   ; GFX8-NEXT: {{  $}}
4281   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4282   ; GFX8-NEXT: {{  $}}
4283   ; GFX8-NEXT: bb.5:
4284   ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4285   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4286   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4287   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4288   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4289   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4290   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4291   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4292   ; GFX8-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4293   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY9]]
4294   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY10]]
4295   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY11]]
4296   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY12]]
4297   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY13]]
4298   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY14]]
4299   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY15]]
4300   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY16]]
4301   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4302   %soffset = add i32 %offset.base, 5000
4303   %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
4304   ret <8 x float> %val
4307 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) {
4308   ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
4309   ; GFX6: bb.1 (%ir-block.0):
4310   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
4311   ; GFX6-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4312   ; GFX6-NEXT: {{  $}}
4313   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4314   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4315   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4316   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4317   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4318   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4319   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
4320   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4321   ; GFX6-NEXT: {{  $}}
4322   ; GFX6-NEXT: bb.2:
4323   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
4324   ; GFX6-NEXT: {{  $}}
4325   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4326   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4327   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4328   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4329   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4330   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4331   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4332   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4333   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4334   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4335   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4336   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4337   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4338   ; GFX6-NEXT: {{  $}}
4339   ; GFX6-NEXT: bb.3:
4340   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4341   ; GFX6-NEXT: {{  $}}
4342   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4343   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4344   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4345   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4346   ; GFX6-NEXT: {{  $}}
4347   ; GFX6-NEXT: bb.4:
4348   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
4349   ; GFX6-NEXT: {{  $}}
4350   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4351   ; GFX6-NEXT: {{  $}}
4352   ; GFX6-NEXT: bb.5:
4353   ; GFX6-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4354   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4355   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4356   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4357   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4358   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4359   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4360   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4361   ; GFX6-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4362   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY9]]
4363   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY10]]
4364   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY11]]
4365   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY12]]
4366   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY13]]
4367   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY14]]
4368   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY15]]
4369   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY16]]
4370   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4371   ;
4372   ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
4373   ; GFX7: bb.1 (%ir-block.0):
4374   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
4375   ; GFX7-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4376   ; GFX7-NEXT: {{  $}}
4377   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4378   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4379   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4380   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4381   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4382   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4383   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
4384   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4385   ; GFX7-NEXT: {{  $}}
4386   ; GFX7-NEXT: bb.2:
4387   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
4388   ; GFX7-NEXT: {{  $}}
4389   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4390   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4391   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4392   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4393   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4394   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4395   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4396   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4397   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4398   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4399   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4400   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4401   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4402   ; GFX7-NEXT: {{  $}}
4403   ; GFX7-NEXT: bb.3:
4404   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4405   ; GFX7-NEXT: {{  $}}
4406   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4407   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4408   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4409   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4410   ; GFX7-NEXT: {{  $}}
4411   ; GFX7-NEXT: bb.4:
4412   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
4413   ; GFX7-NEXT: {{  $}}
4414   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4415   ; GFX7-NEXT: {{  $}}
4416   ; GFX7-NEXT: bb.5:
4417   ; GFX7-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4418   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4419   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4420   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4421   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4422   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4423   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4424   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4425   ; GFX7-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4426   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY9]]
4427   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY10]]
4428   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY11]]
4429   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY12]]
4430   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY13]]
4431   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY14]]
4432   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY15]]
4433   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY16]]
4434   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4435   ;
4436   ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
4437   ; GFX8: bb.1 (%ir-block.0):
4438   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
4439   ; GFX8-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4440   ; GFX8-NEXT: {{  $}}
4441   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4442   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4443   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4444   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4445   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4446   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4447   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12
4448   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4449   ; GFX8-NEXT: {{  $}}
4450   ; GFX8-NEXT: bb.2:
4451   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
4452   ; GFX8-NEXT: {{  $}}
4453   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4454   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4455   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4456   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4457   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4458   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4459   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4460   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4461   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4462   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4463   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4464   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4465   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4466   ; GFX8-NEXT: {{  $}}
4467   ; GFX8-NEXT: bb.3:
4468   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4469   ; GFX8-NEXT: {{  $}}
4470   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4471   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4472   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4473   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4474   ; GFX8-NEXT: {{  $}}
4475   ; GFX8-NEXT: bb.4:
4476   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
4477   ; GFX8-NEXT: {{  $}}
4478   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4479   ; GFX8-NEXT: {{  $}}
4480   ; GFX8-NEXT: bb.5:
4481   ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4482   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4483   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4484   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4485   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4486   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4487   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4488   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4489   ; GFX8-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4490   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY9]]
4491   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY10]]
4492   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY11]]
4493   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY12]]
4494   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY13]]
4495   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY14]]
4496   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY15]]
4497   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY16]]
4498   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4499   %soffset = add i32 %offset.base, 4076
4500   %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
4501   ret <8 x float> %val
4504 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) {
4505   ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
4506   ; GFX6: bb.1 (%ir-block.0):
4507   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
4508   ; GFX6-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4509   ; GFX6-NEXT: {{  $}}
4510   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4511   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4512   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4513   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4514   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4515   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4516   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
4517   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4518   ; GFX6-NEXT: {{  $}}
4519   ; GFX6-NEXT: bb.2:
4520   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
4521   ; GFX6-NEXT: {{  $}}
4522   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4523   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4524   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4525   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4526   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4527   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4528   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4529   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4530   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4531   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4532   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4533   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4534   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4535   ; GFX6-NEXT: {{  $}}
4536   ; GFX6-NEXT: bb.3:
4537   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4538   ; GFX6-NEXT: {{  $}}
4539   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4540   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4541   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4542   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4543   ; GFX6-NEXT: {{  $}}
4544   ; GFX6-NEXT: bb.4:
4545   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
4546   ; GFX6-NEXT: {{  $}}
4547   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4548   ; GFX6-NEXT: {{  $}}
4549   ; GFX6-NEXT: bb.5:
4550   ; GFX6-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4551   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4552   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4553   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4554   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4555   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4556   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4557   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4558   ; GFX6-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4559   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY9]]
4560   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY10]]
4561   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY11]]
4562   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY12]]
4563   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY13]]
4564   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY14]]
4565   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY15]]
4566   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY16]]
4567   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4568   ;
4569   ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
4570   ; GFX7: bb.1 (%ir-block.0):
4571   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
4572   ; GFX7-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4573   ; GFX7-NEXT: {{  $}}
4574   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4575   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4576   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4577   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4578   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4579   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4580   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
4581   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4582   ; GFX7-NEXT: {{  $}}
4583   ; GFX7-NEXT: bb.2:
4584   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
4585   ; GFX7-NEXT: {{  $}}
4586   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4587   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4588   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4589   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4590   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4591   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4592   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4593   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4594   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4595   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4596   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4597   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4598   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4599   ; GFX7-NEXT: {{  $}}
4600   ; GFX7-NEXT: bb.3:
4601   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4602   ; GFX7-NEXT: {{  $}}
4603   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4604   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4605   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4606   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4607   ; GFX7-NEXT: {{  $}}
4608   ; GFX7-NEXT: bb.4:
4609   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
4610   ; GFX7-NEXT: {{  $}}
4611   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4612   ; GFX7-NEXT: {{  $}}
4613   ; GFX7-NEXT: bb.5:
4614   ; GFX7-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4615   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4616   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4617   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4618   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4619   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4620   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4621   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4622   ; GFX7-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4623   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY9]]
4624   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY10]]
4625   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY11]]
4626   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY12]]
4627   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY13]]
4628   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY14]]
4629   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY15]]
4630   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY16]]
4631   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4632   ;
4633   ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
4634   ; GFX8: bb.1 (%ir-block.0):
4635   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
4636   ; GFX8-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
4637   ; GFX8-NEXT: {{  $}}
4638   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4639   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4640   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4641   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4642   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4643   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
4644   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
4645   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4646   ; GFX8-NEXT: {{  $}}
4647   ; GFX8-NEXT: bb.2:
4648   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
4649   ; GFX8-NEXT: {{  $}}
4650   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4651   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4652   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4653   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4654   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4655   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4656   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4657   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4658   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4659   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4660   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
4661   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4662   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4663   ; GFX8-NEXT: {{  $}}
4664   ; GFX8-NEXT: bb.3:
4665   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4666   ; GFX8-NEXT: {{  $}}
4667   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4668   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
4669   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4670   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4671   ; GFX8-NEXT: {{  $}}
4672   ; GFX8-NEXT: bb.4:
4673   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
4674   ; GFX8-NEXT: {{  $}}
4675   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4676   ; GFX8-NEXT: {{  $}}
4677   ; GFX8-NEXT: bb.5:
4678   ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
4679   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4680   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4681   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4682   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4683   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4684   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4685   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4686   ; GFX8-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4687   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY9]]
4688   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY10]]
4689   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY11]]
4690   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY12]]
4691   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY13]]
4692   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY14]]
4693   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY15]]
4694   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY16]]
4695   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4696   %soffset = add i32 %offset.base, 4080
4697   %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
4698   ret <8 x float> %val
4701 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) {
4702   ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4703   ; GFX6: bb.1 (%ir-block.0):
4704   ; GFX6-NEXT:   successors: %bb.2(0x80000000)
4705   ; GFX6-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4706   ; GFX6-NEXT: {{  $}}
4707   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4708   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4709   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4710   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4711   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4712   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4713   ; GFX6-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4714   ; GFX6-NEXT: {{  $}}
4715   ; GFX6-NEXT: bb.2:
4716   ; GFX6-NEXT:   successors: %bb.3(0x80000000)
4717   ; GFX6-NEXT: {{  $}}
4718   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4719   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4720   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4721   ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4722   ; GFX6-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4723   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4724   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4725   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4726   ; GFX6-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4727   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
4728   ; GFX6-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4729   ; GFX6-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4730   ; GFX6-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4731   ; GFX6-NEXT: {{  $}}
4732   ; GFX6-NEXT: bb.3:
4733   ; GFX6-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4734   ; GFX6-NEXT: {{  $}}
4735   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4736   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4737   ; GFX6-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4738   ; GFX6-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4739   ; GFX6-NEXT: {{  $}}
4740   ; GFX6-NEXT: bb.4:
4741   ; GFX6-NEXT:   successors: %bb.5(0x80000000)
4742   ; GFX6-NEXT: {{  $}}
4743   ; GFX6-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4744   ; GFX6-NEXT: {{  $}}
4745   ; GFX6-NEXT: bb.5:
4746   ; GFX6-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4747   ; GFX6-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4748   ; GFX6-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4749   ; GFX6-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4750   ; GFX6-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4751   ; GFX6-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4752   ; GFX6-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4753   ; GFX6-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4754   ; GFX6-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4755   ; GFX6-NEXT:   $vgpr0 = COPY [[COPY8]]
4756   ; GFX6-NEXT:   $vgpr1 = COPY [[COPY9]]
4757   ; GFX6-NEXT:   $vgpr2 = COPY [[COPY10]]
4758   ; GFX6-NEXT:   $vgpr3 = COPY [[COPY11]]
4759   ; GFX6-NEXT:   $vgpr4 = COPY [[COPY12]]
4760   ; GFX6-NEXT:   $vgpr5 = COPY [[COPY13]]
4761   ; GFX6-NEXT:   $vgpr6 = COPY [[COPY14]]
4762   ; GFX6-NEXT:   $vgpr7 = COPY [[COPY15]]
4763   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4764   ;
4765   ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4766   ; GFX7: bb.1 (%ir-block.0):
4767   ; GFX7-NEXT:   successors: %bb.2(0x80000000)
4768   ; GFX7-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4769   ; GFX7-NEXT: {{  $}}
4770   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4771   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4772   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4773   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4774   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4775   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4776   ; GFX7-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4777   ; GFX7-NEXT: {{  $}}
4778   ; GFX7-NEXT: bb.2:
4779   ; GFX7-NEXT:   successors: %bb.3(0x80000000)
4780   ; GFX7-NEXT: {{  $}}
4781   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4782   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4783   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4784   ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4785   ; GFX7-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4786   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4787   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4788   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4789   ; GFX7-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4790   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
4791   ; GFX7-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4792   ; GFX7-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4793   ; GFX7-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4794   ; GFX7-NEXT: {{  $}}
4795   ; GFX7-NEXT: bb.3:
4796   ; GFX7-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4797   ; GFX7-NEXT: {{  $}}
4798   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4799   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4800   ; GFX7-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4801   ; GFX7-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4802   ; GFX7-NEXT: {{  $}}
4803   ; GFX7-NEXT: bb.4:
4804   ; GFX7-NEXT:   successors: %bb.5(0x80000000)
4805   ; GFX7-NEXT: {{  $}}
4806   ; GFX7-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4807   ; GFX7-NEXT: {{  $}}
4808   ; GFX7-NEXT: bb.5:
4809   ; GFX7-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4810   ; GFX7-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4811   ; GFX7-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4812   ; GFX7-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4813   ; GFX7-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4814   ; GFX7-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4815   ; GFX7-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4816   ; GFX7-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4817   ; GFX7-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4818   ; GFX7-NEXT:   $vgpr0 = COPY [[COPY8]]
4819   ; GFX7-NEXT:   $vgpr1 = COPY [[COPY9]]
4820   ; GFX7-NEXT:   $vgpr2 = COPY [[COPY10]]
4821   ; GFX7-NEXT:   $vgpr3 = COPY [[COPY11]]
4822   ; GFX7-NEXT:   $vgpr4 = COPY [[COPY12]]
4823   ; GFX7-NEXT:   $vgpr5 = COPY [[COPY13]]
4824   ; GFX7-NEXT:   $vgpr6 = COPY [[COPY14]]
4825   ; GFX7-NEXT:   $vgpr7 = COPY [[COPY15]]
4826   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4827   ;
4828   ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
4829   ; GFX8: bb.1 (%ir-block.0):
4830   ; GFX8-NEXT:   successors: %bb.2(0x80000000)
4831   ; GFX8-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
4832   ; GFX8-NEXT: {{  $}}
4833   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4834   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4835   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
4836   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
4837   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4838   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4839   ; GFX8-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
4840   ; GFX8-NEXT: {{  $}}
4841   ; GFX8-NEXT: bb.2:
4842   ; GFX8-NEXT:   successors: %bb.3(0x80000000)
4843   ; GFX8-NEXT: {{  $}}
4844   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
4845   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
4846   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
4847   ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
4848   ; GFX8-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
4849   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
4850   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
4851   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1
4852   ; GFX8-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
4853   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
4854   ; GFX8-NEXT:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
4855   ; GFX8-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
4856   ; GFX8-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
4857   ; GFX8-NEXT: {{  $}}
4858   ; GFX8-NEXT: bb.3:
4859   ; GFX8-NEXT:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
4860   ; GFX8-NEXT: {{  $}}
4861   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4862   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
4863   ; GFX8-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
4864   ; GFX8-NEXT:   SI_WATERFALL_LOOP %bb.2, implicit $exec
4865   ; GFX8-NEXT: {{  $}}
4866   ; GFX8-NEXT: bb.4:
4867   ; GFX8-NEXT:   successors: %bb.5(0x80000000)
4868   ; GFX8-NEXT: {{  $}}
4869   ; GFX8-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
4870   ; GFX8-NEXT: {{  $}}
4871   ; GFX8-NEXT: bb.5:
4872   ; GFX8-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
4873   ; GFX8-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
4874   ; GFX8-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
4875   ; GFX8-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
4876   ; GFX8-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3
4877   ; GFX8-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4
4878   ; GFX8-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5
4879   ; GFX8-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6
4880   ; GFX8-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7
4881   ; GFX8-NEXT:   $vgpr0 = COPY [[COPY8]]
4882   ; GFX8-NEXT:   $vgpr1 = COPY [[COPY9]]
4883   ; GFX8-NEXT:   $vgpr2 = COPY [[COPY10]]
4884   ; GFX8-NEXT:   $vgpr3 = COPY [[COPY11]]
4885   ; GFX8-NEXT:   $vgpr4 = COPY [[COPY12]]
4886   ; GFX8-NEXT:   $vgpr5 = COPY [[COPY13]]
4887   ; GFX8-NEXT:   $vgpr6 = COPY [[COPY14]]
4888   ; GFX8-NEXT:   $vgpr7 = COPY [[COPY15]]
4889   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
4890   %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0)
4891   ret <8 x float> %val
4894 define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4895   ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4896   ; GFX6: bb.1 (%ir-block.0):
4897   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4898   ; GFX6-NEXT: {{  $}}
4899   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4900   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4901   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4902   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4903   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4904   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4905   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4906   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4907   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4908   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
4909   ;
4910   ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4911   ; GFX7: bb.1 (%ir-block.0):
4912   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4913   ; GFX7-NEXT: {{  $}}
4914   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4915   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4916   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4917   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4918   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4919   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4920   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4921   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4922   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4923   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
4924   ;
4925   ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
4926   ; GFX8: bb.1 (%ir-block.0):
4927   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4928   ; GFX8-NEXT: {{  $}}
4929   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4930   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4931   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4932   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4933   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4934   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4935   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4936   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4937   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4938   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
4939   %offset = add i32 %offset.v, %offset.s
4940   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4941   ret float %val
4944 define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4945   ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4946   ; GFX6: bb.1 (%ir-block.0):
4947   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4948   ; GFX6-NEXT: {{  $}}
4949   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4950   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4951   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4952   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4953   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4954   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4955   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4956   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4957   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4958   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
4959   ;
4960   ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4961   ; GFX7: bb.1 (%ir-block.0):
4962   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4963   ; GFX7-NEXT: {{  $}}
4964   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4965   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4966   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4967   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4968   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4969   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4970   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4971   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4972   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4973   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
4974   ;
4975   ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
4976   ; GFX8: bb.1 (%ir-block.0):
4977   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4978   ; GFX8-NEXT: {{  $}}
4979   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
4980   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
4981   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
4982   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
4983   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
4984   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4985   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
4986   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
4987   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
4988   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
4989   %offset = add i32 %offset.s, %offset.v
4990   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
4991   ret float %val
4994 define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
4995   ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
4996   ; GFX6: bb.1 (%ir-block.0):
4997   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
4998   ; GFX6-NEXT: {{  $}}
4999   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5000   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5001   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5002   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5003   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5004   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5005   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5006   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5007   ; GFX6-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5008   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5009   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5010   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5011   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5012   ;
5013   ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
5014   ; GFX7: bb.1 (%ir-block.0):
5015   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5016   ; GFX7-NEXT: {{  $}}
5017   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5018   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5019   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5020   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5021   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5022   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5023   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5024   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5025   ; GFX7-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5026   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5027   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5028   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5029   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5030   ;
5031   ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
5032   ; GFX8: bb.1 (%ir-block.0):
5033   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5034   ; GFX8-NEXT: {{  $}}
5035   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5036   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5037   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5038   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5039   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5040   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5041   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5042   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5043   ; GFX8-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5044   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5045   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5046   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5047   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5048   %offset.base = add i32 %offset.v, %offset.s
5049   %offset = add i32 %offset.base, 1024
5050   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
5051   ret float %val
5054 define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
5055   ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
5056   ; GFX6: bb.1 (%ir-block.0):
5057   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5058   ; GFX6-NEXT: {{  $}}
5059   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5060   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5061   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5062   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5063   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5064   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5065   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5066   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5067   ; GFX6-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5068   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5069   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5070   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5071   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5072   ;
5073   ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
5074   ; GFX7: bb.1 (%ir-block.0):
5075   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5076   ; GFX7-NEXT: {{  $}}
5077   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5078   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5079   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5080   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5081   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5082   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5083   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5084   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5085   ; GFX7-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5086   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5087   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5088   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5089   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5090   ;
5091   ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
5092   ; GFX8: bb.1 (%ir-block.0):
5093   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5094   ; GFX8-NEXT: {{  $}}
5095   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5096   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5097   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5098   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5099   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5100   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5101   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5102   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5103   ; GFX8-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5104   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5105   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5106   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5107   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5108   %offset.base = add i32 %offset.s, %offset.v
5109   %offset = add i32 %offset.base, 1024
5110   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
5111   ret float %val
5114 ; TODO: Ideally this would be reassociated to fold.
5115 define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
5116   ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
5117   ; GFX6: bb.1 (%ir-block.0):
5118   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5119   ; GFX6-NEXT: {{  $}}
5120   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5121   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5122   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5123   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5124   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5125   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5126   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5127   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5128   ; GFX6-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5129   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5130   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5131   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5132   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5133   ;
5134   ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
5135   ; GFX7: bb.1 (%ir-block.0):
5136   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5137   ; GFX7-NEXT: {{  $}}
5138   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5139   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5140   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5141   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5142   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5143   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5144   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5145   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5146   ; GFX7-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5147   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5148   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5149   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5150   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5151   ;
5152   ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
5153   ; GFX8: bb.1 (%ir-block.0):
5154   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5155   ; GFX8-NEXT: {{  $}}
5156   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5157   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5158   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5159   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5160   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5161   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5162   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5163   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5164   ; GFX8-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
5165   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5166   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5167   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5168   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5169   %offset.base = add i32 %offset.s, 1024
5170   %offset = add i32 %offset.base, %offset.v
5171   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
5172   ret float %val
5175 define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
5176   ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
5177   ; GFX6: bb.1 (%ir-block.0):
5178   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5179   ; GFX6-NEXT: {{  $}}
5180   ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5181   ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5182   ; GFX6-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5183   ; GFX6-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5184   ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5185   ; GFX6-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5186   ; GFX6-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5187   ; GFX6-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5188   ; GFX6-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5189   ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5190   ; GFX6-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5191   ; GFX6-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5192   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5193   ;
5194   ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
5195   ; GFX7: bb.1 (%ir-block.0):
5196   ; GFX7-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5197   ; GFX7-NEXT: {{  $}}
5198   ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5199   ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5200   ; GFX7-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5201   ; GFX7-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5202   ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5203   ; GFX7-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5204   ; GFX7-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5205   ; GFX7-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5206   ; GFX7-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5207   ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5208   ; GFX7-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5209   ; GFX7-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5210   ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5211   ;
5212   ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
5213   ; GFX8: bb.1 (%ir-block.0):
5214   ; GFX8-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
5215   ; GFX8-NEXT: {{  $}}
5216   ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
5217   ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
5218   ; GFX8-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
5219   ; GFX8-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
5220   ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
5221   ; GFX8-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5222   ; GFX8-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
5223   ; GFX8-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
5224   ; GFX8-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
5225   ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5226   ; GFX8-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
5227   ; GFX8-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
5228   ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
5229   %offset.base = add i32 %offset.v, 1024
5230   %offset = add i32 %offset.base, %offset.s
5231   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
5232   ret float %val
5235 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
5236 declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg)
5237 declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
5238 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg)
5239 declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32 immarg)
5240 declare <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32>, i32, i32 immarg)
5242 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32      immarg)
5243 declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32 immarg)
5244 declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32 immarg)
5245 declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg)
5246 declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32 immarg)
5247 declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32 immarg)
5249 declare i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32>, i32, i32 immarg)
5250 declare i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32>, i32, i32 immarg)
5251 declare i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32>, i32, i32 immarg)
5253 declare <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32>, i32, i32 immarg)
5254 declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg)
5256 declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg)
5257 declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg)
5259 declare <4 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v4p1(<4 x i32>, i32, i32 immarg)
5260 declare <8 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v8p1(<4 x i32>, i32, i32 immarg)