1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -march=amdgcn -mcpu=gfx908 -stop-after=si-fix-sgpr-copies -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s
4 ;; Older intrinsics that take <4 x i32>
6 define float @llvm_amdgcn_raw_buffer_load_f32(i32 %voffset, i32 %soffset) {
7 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_f32
8 ; GFX908: bb.0 (%ir-block.0):
9 ; GFX908-NEXT: successors: %bb.1(0x80000000)
10 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
12 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
13 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
14 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
15 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
16 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
19 ; GFX908-NEXT: successors: %bb.2(0x80000000)
21 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
22 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
23 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
26 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
28 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
29 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
30 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
33 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
34 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
35 ; GFX908-NEXT: SI_RETURN implicit $vgpr0
36 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
40 define float @llvm_amdgcn_raw_tbuffer_load_f32(i32 %voffset, i32 %soffset) {
41 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_f32
42 ; GFX908: bb.0 (%ir-block.0):
43 ; GFX908-NEXT: successors: %bb.1(0x80000000)
44 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
46 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
47 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
48 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
49 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
50 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
53 ; GFX908-NEXT: successors: %bb.2(0x80000000)
55 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
56 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
57 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
60 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
62 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
63 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
64 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
67 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
68 ; GFX908-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
69 ; GFX908-NEXT: SI_RETURN implicit $vgpr0
70 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
74 define <2 x float> @llvm_amdgcn_raw_buffer_load_v2f32(i32 %voffset, i32 %soffset) {
75 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_v2f32
76 ; GFX908: bb.0 (%ir-block.0):
77 ; GFX908-NEXT: successors: %bb.1(0x80000000)
78 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
80 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
81 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
82 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
83 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
84 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
87 ; GFX908-NEXT: successors: %bb.2(0x80000000)
89 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
90 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
91 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
94 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
96 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 8)
97 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
98 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
101 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
102 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
103 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
104 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
105 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
106 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
107 %val = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
111 define <2 x float> @llvm_amdgcn_raw_tbuffer_load_v2f32(i32 %voffset, i32 %soffset) {
112 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_v2f32
113 ; GFX908: bb.0 (%ir-block.0):
114 ; GFX908-NEXT: successors: %bb.1(0x80000000)
115 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
116 ; GFX908-NEXT: {{ $}}
117 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
118 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
119 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
120 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
121 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
122 ; GFX908-NEXT: {{ $}}
124 ; GFX908-NEXT: successors: %bb.2(0x80000000)
125 ; GFX908-NEXT: {{ $}}
126 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
127 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
128 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
129 ; GFX908-NEXT: {{ $}}
131 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
132 ; GFX908-NEXT: {{ $}}
133 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 8)
134 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
135 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
136 ; GFX908-NEXT: {{ $}}
138 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
139 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0
140 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1
141 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
142 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
143 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
144 %val = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
148 define <3 x float> @llvm_amdgcn_raw_buffer_load_v3f32(i32 %voffset, i32 %soffset) {
149 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_v3f32
150 ; GFX908: bb.0 (%ir-block.0):
151 ; GFX908-NEXT: successors: %bb.1(0x80000000)
152 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
153 ; GFX908-NEXT: {{ $}}
154 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
155 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
156 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
157 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
158 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
159 ; GFX908-NEXT: {{ $}}
161 ; GFX908-NEXT: successors: %bb.2(0x80000000)
162 ; GFX908-NEXT: {{ $}}
163 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
164 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
165 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
166 ; GFX908-NEXT: {{ $}}
168 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
169 ; GFX908-NEXT: {{ $}}
170 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 8)
171 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
172 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
173 ; GFX908-NEXT: {{ $}}
175 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
176 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0
177 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1
178 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2
179 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
180 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
181 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]]
182 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
183 %val = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
187 define <3 x float> @llvm_amdgcn_raw_tbuffer_load_v3f32(i32 %voffset, i32 %soffset) {
188 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_v3f32
189 ; GFX908: bb.0 (%ir-block.0):
190 ; GFX908-NEXT: successors: %bb.1(0x80000000)
191 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
192 ; GFX908-NEXT: {{ $}}
193 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
194 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
195 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
196 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
197 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
198 ; GFX908-NEXT: {{ $}}
200 ; GFX908-NEXT: successors: %bb.2(0x80000000)
201 ; GFX908-NEXT: {{ $}}
202 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
203 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
204 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
205 ; GFX908-NEXT: {{ $}}
207 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
208 ; GFX908-NEXT: {{ $}}
209 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 8)
210 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
211 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
212 ; GFX908-NEXT: {{ $}}
214 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
215 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0
216 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1
217 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2
218 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
219 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
220 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]]
221 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
222 %val = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
226 define <4 x float> @llvm_amdgcn_raw_buffer_load_v4f32(i32 %voffset, i32 %soffset) {
227 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_v4f32
228 ; GFX908: bb.0 (%ir-block.0):
229 ; GFX908-NEXT: successors: %bb.1(0x80000000)
230 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
231 ; GFX908-NEXT: {{ $}}
232 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
233 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
234 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
235 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
236 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
237 ; GFX908-NEXT: {{ $}}
239 ; GFX908-NEXT: successors: %bb.2(0x80000000)
240 ; GFX908-NEXT: {{ $}}
241 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
242 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
243 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
244 ; GFX908-NEXT: {{ $}}
246 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
247 ; GFX908-NEXT: {{ $}}
248 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8)
249 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
250 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
251 ; GFX908-NEXT: {{ $}}
253 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
254 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
255 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
256 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
257 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
258 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
259 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
260 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]]
261 ; GFX908-NEXT: $vgpr3 = COPY [[COPY5]]
262 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
263 %val = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
267 define <4 x float> @llvm_amdgcn_raw_tbuffer_load_v4f32(i32 %voffset, i32 %soffset) {
268 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_v4f32
269 ; GFX908: bb.0 (%ir-block.0):
270 ; GFX908-NEXT: successors: %bb.1(0x80000000)
271 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
272 ; GFX908-NEXT: {{ $}}
273 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
274 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
275 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
276 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
277 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
278 ; GFX908-NEXT: {{ $}}
280 ; GFX908-NEXT: successors: %bb.2(0x80000000)
281 ; GFX908-NEXT: {{ $}}
282 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
283 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
284 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
285 ; GFX908-NEXT: {{ $}}
287 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
288 ; GFX908-NEXT: {{ $}}
289 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8)
290 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
291 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
292 ; GFX908-NEXT: {{ $}}
294 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
295 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
296 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
297 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
298 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3
299 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
300 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
301 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]]
302 ; GFX908-NEXT: $vgpr3 = COPY [[COPY5]]
303 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
304 %val = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
308 define void @llvm_amdgcn_raw_buffer_store_f32(float %val, i32 %voffset, i32 %soffset) {
309 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_f32
310 ; GFX908: bb.0 (%ir-block.0):
311 ; GFX908-NEXT: successors: %bb.1(0x80000000)
312 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
313 ; GFX908-NEXT: {{ $}}
314 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
315 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
316 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
317 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
318 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
319 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
320 ; GFX908-NEXT: {{ $}}
322 ; GFX908-NEXT: successors: %bb.2(0x80000000)
323 ; GFX908-NEXT: {{ $}}
324 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
325 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
326 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
327 ; GFX908-NEXT: {{ $}}
329 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
330 ; GFX908-NEXT: {{ $}}
331 ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
332 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
333 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
334 ; GFX908-NEXT: {{ $}}
336 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
337 ; GFX908-NEXT: SI_RETURN
338 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
342 define void @llvm_amdgcn_raw_tbuffer_store_f32(float %val, i32 %voffset, i32 %soffset) {
343 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_f32
344 ; GFX908: bb.0 (%ir-block.0):
345 ; GFX908-NEXT: successors: %bb.1(0x80000000)
346 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
347 ; GFX908-NEXT: {{ $}}
348 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
349 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
350 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
351 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
352 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
353 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
354 ; GFX908-NEXT: {{ $}}
356 ; GFX908-NEXT: successors: %bb.2(0x80000000)
357 ; GFX908-NEXT: {{ $}}
358 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
359 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
360 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
361 ; GFX908-NEXT: {{ $}}
363 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
364 ; GFX908-NEXT: {{ $}}
365 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
366 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
367 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
368 ; GFX908-NEXT: {{ $}}
370 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
371 ; GFX908-NEXT: SI_RETURN
372 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
376 define void @llvm_amdgcn_raw_buffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) {
377 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_v2f32
378 ; GFX908: bb.0 (%ir-block.0):
379 ; GFX908-NEXT: successors: %bb.1(0x80000000)
380 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
381 ; GFX908-NEXT: {{ $}}
382 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
383 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
384 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
385 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
386 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
387 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
388 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
389 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
390 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
391 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
392 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
393 ; GFX908-NEXT: {{ $}}
395 ; GFX908-NEXT: successors: %bb.2(0x80000000)
396 ; GFX908-NEXT: {{ $}}
397 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
398 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
399 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
400 ; GFX908-NEXT: {{ $}}
402 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
403 ; GFX908-NEXT: {{ $}}
404 ; GFX908-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8)
405 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
406 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
407 ; GFX908-NEXT: {{ $}}
409 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
410 ; GFX908-NEXT: SI_RETURN
411 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
415 define void @llvm_amdgcn_raw_tbuffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) {
416 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_v2f32
417 ; GFX908: bb.0 (%ir-block.0):
418 ; GFX908-NEXT: successors: %bb.1(0x80000000)
419 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
420 ; GFX908-NEXT: {{ $}}
421 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
422 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
423 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
424 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
425 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
426 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
427 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
428 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
429 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
430 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
431 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
432 ; GFX908-NEXT: {{ $}}
434 ; GFX908-NEXT: successors: %bb.2(0x80000000)
435 ; GFX908-NEXT: {{ $}}
436 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
437 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
438 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
439 ; GFX908-NEXT: {{ $}}
441 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
442 ; GFX908-NEXT: {{ $}}
443 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8)
444 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
445 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
446 ; GFX908-NEXT: {{ $}}
448 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
449 ; GFX908-NEXT: SI_RETURN
450 call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
454 define void @llvm_amdgcn_raw_buffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) {
455 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_v3f32
456 ; GFX908: bb.0 (%ir-block.0):
457 ; GFX908-NEXT: successors: %bb.1(0x80000000)
458 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
459 ; GFX908-NEXT: {{ $}}
460 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
461 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
462 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
463 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
464 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
465 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
466 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
467 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
468 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2
469 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
470 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
471 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
472 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
473 ; GFX908-NEXT: {{ $}}
475 ; GFX908-NEXT: successors: %bb.2(0x80000000)
476 ; GFX908-NEXT: {{ $}}
477 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
478 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
479 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
480 ; GFX908-NEXT: {{ $}}
482 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
483 ; GFX908-NEXT: {{ $}}
484 ; GFX908-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 8)
485 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
486 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
487 ; GFX908-NEXT: {{ $}}
489 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
490 ; GFX908-NEXT: SI_RETURN
491 call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
495 define void @llvm_amdgcn_raw_tbuffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) {
496 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_v3f32
497 ; GFX908: bb.0 (%ir-block.0):
498 ; GFX908-NEXT: successors: %bb.1(0x80000000)
499 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
500 ; GFX908-NEXT: {{ $}}
501 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
502 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
503 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
504 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
505 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
506 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
507 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
508 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
509 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2
510 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
511 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
512 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
513 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
514 ; GFX908-NEXT: {{ $}}
516 ; GFX908-NEXT: successors: %bb.2(0x80000000)
517 ; GFX908-NEXT: {{ $}}
518 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
519 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
520 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
521 ; GFX908-NEXT: {{ $}}
523 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
524 ; GFX908-NEXT: {{ $}}
525 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 8)
526 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
527 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
528 ; GFX908-NEXT: {{ $}}
530 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
531 ; GFX908-NEXT: SI_RETURN
532 call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
536 define void @llvm_amdgcn_raw_buffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) {
537 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_v4f32
538 ; GFX908: bb.0 (%ir-block.0):
539 ; GFX908-NEXT: successors: %bb.1(0x80000000)
540 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
541 ; GFX908-NEXT: {{ $}}
542 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
543 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
544 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
545 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
546 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
547 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
548 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
549 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
550 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
551 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
552 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
553 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]]
554 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
555 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
556 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
557 ; GFX908-NEXT: {{ $}}
559 ; GFX908-NEXT: successors: %bb.2(0x80000000)
560 ; GFX908-NEXT: {{ $}}
561 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
562 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
563 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
564 ; GFX908-NEXT: {{ $}}
566 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
567 ; GFX908-NEXT: {{ $}}
568 ; GFX908-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8)
569 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
570 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
571 ; GFX908-NEXT: {{ $}}
573 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
574 ; GFX908-NEXT: SI_RETURN
575 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
579 define void @llvm_amdgcn_raw_tbuffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) {
580 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_v4f32
581 ; GFX908: bb.0 (%ir-block.0):
582 ; GFX908-NEXT: successors: %bb.1(0x80000000)
583 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
584 ; GFX908-NEXT: {{ $}}
585 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
586 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
587 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
588 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
589 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
590 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
591 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
592 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
593 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
594 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
595 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
596 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]]
597 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
598 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
599 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
600 ; GFX908-NEXT: {{ $}}
602 ; GFX908-NEXT: successors: %bb.2(0x80000000)
603 ; GFX908-NEXT: {{ $}}
604 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
605 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
606 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
607 ; GFX908-NEXT: {{ $}}
609 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
610 ; GFX908-NEXT: {{ $}}
611 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8)
612 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
613 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
614 ; GFX908-NEXT: {{ $}}
616 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
617 ; GFX908-NEXT: SI_RETURN
618 call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
622 ;; Newer intrinsics that taken addrspace(8) pointers
624 define float @llvm_amdgcn_raw_ptr_buffer_load_f32(i32 %voffset, i32 %soffset) {
625 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_f32
626 ; GFX908: bb.0 (%ir-block.0):
627 ; GFX908-NEXT: successors: %bb.1(0x80000000)
628 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
629 ; GFX908-NEXT: {{ $}}
630 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
631 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
632 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
633 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
634 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
635 ; GFX908-NEXT: {{ $}}
637 ; GFX908-NEXT: successors: %bb.2(0x80000000)
638 ; GFX908-NEXT: {{ $}}
639 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
640 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
641 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
642 ; GFX908-NEXT: {{ $}}
644 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
645 ; GFX908-NEXT: {{ $}}
646 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from `ptr addrspace(8) poison`, align 1, addrspace 8)
647 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
648 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
649 ; GFX908-NEXT: {{ $}}
651 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
652 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
653 ; GFX908-NEXT: SI_RETURN implicit $vgpr0
654 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
658 define float @llvm_amdgcn_raw_ptr_tbuffer_load_f32(i32 %voffset, i32 %soffset) {
659 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_f32
660 ; GFX908: bb.0 (%ir-block.0):
661 ; GFX908-NEXT: successors: %bb.1(0x80000000)
662 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
663 ; GFX908-NEXT: {{ $}}
664 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
665 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
666 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
667 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
668 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
669 ; GFX908-NEXT: {{ $}}
671 ; GFX908-NEXT: successors: %bb.2(0x80000000)
672 ; GFX908-NEXT: {{ $}}
673 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
674 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
675 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
676 ; GFX908-NEXT: {{ $}}
678 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
679 ; GFX908-NEXT: {{ $}}
680 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from `ptr addrspace(8) poison`, align 1, addrspace 8)
681 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
682 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
683 ; GFX908-NEXT: {{ $}}
685 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
686 ; GFX908-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
687 ; GFX908-NEXT: SI_RETURN implicit $vgpr0
688 %val = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
692 define <2 x float> @llvm_amdgcn_raw_ptr_buffer_load_v2f32(i32 %voffset, i32 %soffset) {
693 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_v2f32
694 ; GFX908: bb.0 (%ir-block.0):
695 ; GFX908-NEXT: successors: %bb.1(0x80000000)
696 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
697 ; GFX908-NEXT: {{ $}}
698 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
699 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
700 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
701 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
702 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
703 ; GFX908-NEXT: {{ $}}
705 ; GFX908-NEXT: successors: %bb.2(0x80000000)
706 ; GFX908-NEXT: {{ $}}
707 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
708 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
709 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
710 ; GFX908-NEXT: {{ $}}
712 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
713 ; GFX908-NEXT: {{ $}}
714 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from `ptr addrspace(8) poison`, align 1, addrspace 8)
715 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
716 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
717 ; GFX908-NEXT: {{ $}}
719 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
720 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
721 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
722 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
723 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
724 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
725 %val = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
729 define <2 x float> @llvm_amdgcn_raw_ptr_tbuffer_load_v2f32(i32 %voffset, i32 %soffset) {
730 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_v2f32
731 ; GFX908: bb.0 (%ir-block.0):
732 ; GFX908-NEXT: successors: %bb.1(0x80000000)
733 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
734 ; GFX908-NEXT: {{ $}}
735 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
736 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
737 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
738 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
739 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
740 ; GFX908-NEXT: {{ $}}
742 ; GFX908-NEXT: successors: %bb.2(0x80000000)
743 ; GFX908-NEXT: {{ $}}
744 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
745 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
746 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
747 ; GFX908-NEXT: {{ $}}
749 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
750 ; GFX908-NEXT: {{ $}}
751 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from `ptr addrspace(8) poison`, align 1, addrspace 8)
752 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
753 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
754 ; GFX908-NEXT: {{ $}}
756 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
757 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0
758 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1
759 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
760 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
761 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
762 %val = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
766 define <3 x float> @llvm_amdgcn_raw_ptr_buffer_load_v3f32(i32 %voffset, i32 %soffset) {
767 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_v3f32
768 ; GFX908: bb.0 (%ir-block.0):
769 ; GFX908-NEXT: successors: %bb.1(0x80000000)
770 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
771 ; GFX908-NEXT: {{ $}}
772 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
773 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
774 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
775 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
776 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
777 ; GFX908-NEXT: {{ $}}
779 ; GFX908-NEXT: successors: %bb.2(0x80000000)
780 ; GFX908-NEXT: {{ $}}
781 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
782 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
783 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
784 ; GFX908-NEXT: {{ $}}
786 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
787 ; GFX908-NEXT: {{ $}}
788 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s96) from `ptr addrspace(8) poison`, align 1, addrspace 8)
789 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
790 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
791 ; GFX908-NEXT: {{ $}}
793 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
794 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0
795 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1
796 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2
797 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
798 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
799 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]]
800 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
801 %val = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
805 define <3 x float> @llvm_amdgcn_raw_ptr_tbuffer_load_v3f32(i32 %voffset, i32 %soffset) {
806 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_v3f32
807 ; GFX908: bb.0 (%ir-block.0):
808 ; GFX908-NEXT: successors: %bb.1(0x80000000)
809 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
810 ; GFX908-NEXT: {{ $}}
811 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
812 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
813 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
814 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
815 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
816 ; GFX908-NEXT: {{ $}}
818 ; GFX908-NEXT: successors: %bb.2(0x80000000)
819 ; GFX908-NEXT: {{ $}}
820 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
821 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
822 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
823 ; GFX908-NEXT: {{ $}}
825 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
826 ; GFX908-NEXT: {{ $}}
827 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96) from `ptr addrspace(8) poison`, align 1, addrspace 8)
828 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
829 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
830 ; GFX908-NEXT: {{ $}}
832 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
833 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0
834 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1
835 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2
836 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
837 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
838 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]]
839 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
840 %val = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
844 define <4 x float> @llvm_amdgcn_raw_ptr_buffer_load_v4f32(i32 %voffset, i32 %soffset) {
845 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_v4f32
846 ; GFX908: bb.0 (%ir-block.0):
847 ; GFX908-NEXT: successors: %bb.1(0x80000000)
848 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
849 ; GFX908-NEXT: {{ $}}
850 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
851 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
852 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
853 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
854 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
855 ; GFX908-NEXT: {{ $}}
857 ; GFX908-NEXT: successors: %bb.2(0x80000000)
858 ; GFX908-NEXT: {{ $}}
859 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
860 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
861 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
862 ; GFX908-NEXT: {{ $}}
864 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
865 ; GFX908-NEXT: {{ $}}
866 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) poison`, align 1, addrspace 8)
867 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
868 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
869 ; GFX908-NEXT: {{ $}}
871 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
872 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
873 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
874 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
875 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
876 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
877 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
878 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]]
879 ; GFX908-NEXT: $vgpr3 = COPY [[COPY5]]
880 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
881 %val = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
885 define <4 x float> @llvm_amdgcn_raw_ptr_tbuffer_load_v4f32(i32 %voffset, i32 %soffset) {
886 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_v4f32
887 ; GFX908: bb.0 (%ir-block.0):
888 ; GFX908-NEXT: successors: %bb.1(0x80000000)
889 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1
890 ; GFX908-NEXT: {{ $}}
891 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
892 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
893 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
894 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
895 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
896 ; GFX908-NEXT: {{ $}}
898 ; GFX908-NEXT: successors: %bb.2(0x80000000)
899 ; GFX908-NEXT: {{ $}}
900 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
901 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
902 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
903 ; GFX908-NEXT: {{ $}}
905 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
906 ; GFX908-NEXT: {{ $}}
907 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) poison`, align 1, addrspace 8)
908 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
909 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
910 ; GFX908-NEXT: {{ $}}
912 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
913 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
914 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
915 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
916 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3
917 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]]
918 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]]
919 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]]
920 ; GFX908-NEXT: $vgpr3 = COPY [[COPY5]]
921 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
922 %val = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
926 define void @llvm_amdgcn_raw_ptr_buffer_store_f32(float %val, i32 %voffset, i32 %soffset) {
927 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_f32
928 ; GFX908: bb.0 (%ir-block.0):
929 ; GFX908-NEXT: successors: %bb.1(0x80000000)
930 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
931 ; GFX908-NEXT: {{ $}}
932 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
933 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
934 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
935 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
936 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
937 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
938 ; GFX908-NEXT: {{ $}}
940 ; GFX908-NEXT: successors: %bb.2(0x80000000)
941 ; GFX908-NEXT: {{ $}}
942 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
943 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
944 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
945 ; GFX908-NEXT: {{ $}}
947 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
948 ; GFX908-NEXT: {{ $}}
949 ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into `ptr addrspace(8) poison`, align 1, addrspace 8)
950 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
951 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
952 ; GFX908-NEXT: {{ $}}
954 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
955 ; GFX908-NEXT: SI_RETURN
956 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
960 define void @llvm_amdgcn_raw_ptr_tbuffer_store_f32(float %val, i32 %voffset, i32 %soffset) {
961 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_f32
962 ; GFX908: bb.0 (%ir-block.0):
963 ; GFX908-NEXT: successors: %bb.1(0x80000000)
964 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
965 ; GFX908-NEXT: {{ $}}
966 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
967 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
968 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
969 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
970 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
971 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
972 ; GFX908-NEXT: {{ $}}
974 ; GFX908-NEXT: successors: %bb.2(0x80000000)
975 ; GFX908-NEXT: {{ $}}
976 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
977 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
978 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
979 ; GFX908-NEXT: {{ $}}
981 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
982 ; GFX908-NEXT: {{ $}}
983 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into `ptr addrspace(8) poison`, align 1, addrspace 8)
984 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
985 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
986 ; GFX908-NEXT: {{ $}}
988 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
989 ; GFX908-NEXT: SI_RETURN
990 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
994 define void @llvm_amdgcn_raw_ptr_buffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) {
995 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_v2f32
996 ; GFX908: bb.0 (%ir-block.0):
997 ; GFX908-NEXT: successors: %bb.1(0x80000000)
998 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
999 ; GFX908-NEXT: {{ $}}
1000 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1001 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1002 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1003 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1004 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1005 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1006 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
1007 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
1008 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1009 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1010 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1011 ; GFX908-NEXT: {{ $}}
1012 ; GFX908-NEXT: bb.1:
1013 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1014 ; GFX908-NEXT: {{ $}}
1015 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1016 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1017 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1018 ; GFX908-NEXT: {{ $}}
1019 ; GFX908-NEXT: bb.2:
1020 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1021 ; GFX908-NEXT: {{ $}}
1022 ; GFX908-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1023 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1024 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1025 ; GFX908-NEXT: {{ $}}
1026 ; GFX908-NEXT: bb.3:
1027 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1028 ; GFX908-NEXT: SI_RETURN
1029 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
1033 define void @llvm_amdgcn_raw_ptr_tbuffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) {
1034 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_v2f32
1035 ; GFX908: bb.0 (%ir-block.0):
1036 ; GFX908-NEXT: successors: %bb.1(0x80000000)
1037 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
1038 ; GFX908-NEXT: {{ $}}
1039 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1040 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1041 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1042 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1043 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1044 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1045 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
1046 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
1047 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1048 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1049 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1050 ; GFX908-NEXT: {{ $}}
1051 ; GFX908-NEXT: bb.1:
1052 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1053 ; GFX908-NEXT: {{ $}}
1054 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1055 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1056 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1057 ; GFX908-NEXT: {{ $}}
1058 ; GFX908-NEXT: bb.2:
1059 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1060 ; GFX908-NEXT: {{ $}}
1061 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1062 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1063 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1064 ; GFX908-NEXT: {{ $}}
1065 ; GFX908-NEXT: bb.3:
1066 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1067 ; GFX908-NEXT: SI_RETURN
1068 call void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f32(<2 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
1072 define void @llvm_amdgcn_raw_ptr_buffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) {
1073 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_v3f32
1074 ; GFX908: bb.0 (%ir-block.0):
1075 ; GFX908-NEXT: successors: %bb.1(0x80000000)
1076 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1077 ; GFX908-NEXT: {{ $}}
1078 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1079 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1080 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1081 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1082 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1083 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1084 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1085 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1086 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2
1087 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
1088 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1089 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1090 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1091 ; GFX908-NEXT: {{ $}}
1092 ; GFX908-NEXT: bb.1:
1093 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1094 ; GFX908-NEXT: {{ $}}
1095 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1096 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1097 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1098 ; GFX908-NEXT: {{ $}}
1099 ; GFX908-NEXT: bb.2:
1100 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1101 ; GFX908-NEXT: {{ $}}
1102 ; GFX908-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1103 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1104 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1105 ; GFX908-NEXT: {{ $}}
1106 ; GFX908-NEXT: bb.3:
1107 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1108 ; GFX908-NEXT: SI_RETURN
1109 call void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
1113 define void @llvm_amdgcn_raw_ptr_tbuffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) {
1114 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_v3f32
1115 ; GFX908: bb.0 (%ir-block.0):
1116 ; GFX908-NEXT: successors: %bb.1(0x80000000)
1117 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1118 ; GFX908-NEXT: {{ $}}
1119 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1120 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1121 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1122 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1123 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1124 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1125 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1126 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1127 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2
1128 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
1129 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1130 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1131 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1132 ; GFX908-NEXT: {{ $}}
1133 ; GFX908-NEXT: bb.1:
1134 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1135 ; GFX908-NEXT: {{ $}}
1136 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1137 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1138 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1139 ; GFX908-NEXT: {{ $}}
1140 ; GFX908-NEXT: bb.2:
1141 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1142 ; GFX908-NEXT: {{ $}}
1143 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1144 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1145 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1146 ; GFX908-NEXT: {{ $}}
1147 ; GFX908-NEXT: bb.3:
1148 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1149 ; GFX908-NEXT: SI_RETURN
1150 call void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f32(<3 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
1154 define void @llvm_amdgcn_raw_ptr_buffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) {
1155 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_v4f32
1156 ; GFX908: bb.0 (%ir-block.0):
1157 ; GFX908-NEXT: successors: %bb.1(0x80000000)
1158 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1159 ; GFX908-NEXT: {{ $}}
1160 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1161 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1162 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1163 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1164 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1165 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1166 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1167 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1168 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1169 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1170 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
1171 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]]
1172 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1173 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1174 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1175 ; GFX908-NEXT: {{ $}}
1176 ; GFX908-NEXT: bb.1:
1177 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1178 ; GFX908-NEXT: {{ $}}
1179 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1180 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1181 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1182 ; GFX908-NEXT: {{ $}}
1183 ; GFX908-NEXT: bb.2:
1184 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1185 ; GFX908-NEXT: {{ $}}
1186 ; GFX908-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1187 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1188 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1189 ; GFX908-NEXT: {{ $}}
1190 ; GFX908-NEXT: bb.3:
1191 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1192 ; GFX908-NEXT: SI_RETURN
1193 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
1197 define void @llvm_amdgcn_raw_ptr_tbuffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) {
1198 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_v4f32
1199 ; GFX908: bb.0 (%ir-block.0):
1200 ; GFX908-NEXT: successors: %bb.1(0x80000000)
1201 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1202 ; GFX908-NEXT: {{ $}}
1203 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1204 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1205 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1206 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1207 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1208 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1209 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1210 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1211 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1212 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1213 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
1214 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]]
1215 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1216 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1217 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1218 ; GFX908-NEXT: {{ $}}
1219 ; GFX908-NEXT: bb.1:
1220 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1221 ; GFX908-NEXT: {{ $}}
1222 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1223 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1224 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1225 ; GFX908-NEXT: {{ $}}
1226 ; GFX908-NEXT: bb.2:
1227 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1228 ; GFX908-NEXT: {{ $}}
1229 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1230 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1231 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1232 ; GFX908-NEXT: {{ $}}
1233 ; GFX908-NEXT: bb.3:
1234 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1235 ; GFX908-NEXT: SI_RETURN
1236 call void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
1240 declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 )
1241 declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32)
1242 declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32)
1243 declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32)
1244 declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32)
1245 declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32)
1246 declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32)
1247 declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32)
1248 declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32)
1249 declare void @llvm.amdgcn.raw.tbuffer.store.f32(float, <4 x i32>, i32, i32, i32, i32)
1250 declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32)
1251 declare void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32)
1252 declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32)
1253 declare void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32, i32)
1254 declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32)
1255 declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32)
1257 declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) nocapture, i32, i32, i32 )
1258 declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
1259 declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) nocapture, i32, i32, i32)
1260 declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
1261 declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) nocapture, i32, i32, i32)
1262 declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
1263 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) nocapture, i32, i32, i32)
1264 declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
1265 declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8) nocapture, i32, i32, i32)
1266 declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float, ptr addrspace(8) nocapture, i32, i32, i32, i32)
1267 declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8) nocapture, i32, i32, i32)
1268 declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f32(<2 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32)
1269 declare void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float>, ptr addrspace(8) nocapture, i32, i32, i32)
1270 declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f32(<3 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32)
1271 declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8) nocapture, i32, i32, i32)
1272 declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32)