1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -march=amdgcn -mcpu=gfx906 < %s | FileCheck --check-prefix=GFX906 %s
4 define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) {
5 ; GFX906-LABEL: v3i8_liveout:
6 ; GFX906: ; %bb.0: ; %entry
7 ; GFX906-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
8 ; GFX906-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
9 ; GFX906-NEXT: v_lshlrev_b32_e32 v5, 2, v0
10 ; GFX906-NEXT: v_mov_b32_e32 v1, 0
11 ; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
12 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
13 ; GFX906-NEXT: global_load_dword v2, v5, s[4:5]
14 ; GFX906-NEXT: s_waitcnt vmcnt(0)
15 ; GFX906-NEXT: v_lshrrev_b32_e32 v3, 16, v2
16 ; GFX906-NEXT: v_lshrrev_b32_e32 v4, 8, v2
17 ; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc
18 ; GFX906-NEXT: s_cbranch_execz .LBB0_2
19 ; GFX906-NEXT: ; %bb.1: ; %bb.1
20 ; GFX906-NEXT: global_load_dword v2, v5, s[6:7]
21 ; GFX906-NEXT: s_waitcnt vmcnt(0)
22 ; GFX906-NEXT: v_lshrrev_b32_e32 v3, 16, v2
23 ; GFX906-NEXT: v_lshrrev_b32_e32 v4, 8, v2
24 ; GFX906-NEXT: .LBB0_2: ; %bb.2
25 ; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
26 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v4
27 ; GFX906-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
28 ; GFX906-NEXT: global_store_byte v1, v3, s[2:3] offset:2
29 ; GFX906-NEXT: global_store_short v1, v0, s[2:3]
30 ; GFX906-NEXT: s_endpgm
32 %idx = call i32 @llvm.amdgcn.workitem.id.x()
33 %gep1 = getelementptr <3 x i8>, ptr addrspace(1) %src1, i32 %idx
34 %vec1 = load <3 x i8>, ptr addrspace(1) %gep1
35 %gep2 = getelementptr <3 x i8>, ptr addrspace(1) %src2, i32 %idx
36 %vec2 = load <3 x i8>, ptr addrspace(1) %gep2
37 %cmp = icmp ult i32 %idx, 15
38 br i1 %cmp, label %bb.1, label %bb.2
43 %tmp5 = phi <3 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ]
44 store <3 x i8> %tmp5, ptr addrspace(1) %dst, align 4
48 define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) {
49 ; GFX906-LABEL: v4i8_liveout:
50 ; GFX906: ; %bb.0: ; %entry
51 ; GFX906-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
52 ; GFX906-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
53 ; GFX906-NEXT: v_lshlrev_b32_e32 v6, 2, v0
54 ; GFX906-NEXT: v_mov_b32_e32 v1, 0
55 ; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
56 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
57 ; GFX906-NEXT: global_load_dword v2, v6, s[4:5]
58 ; GFX906-NEXT: s_waitcnt vmcnt(0)
59 ; GFX906-NEXT: v_lshrrev_b32_e32 v3, 24, v2
60 ; GFX906-NEXT: v_lshrrev_b32_e32 v4, 16, v2
61 ; GFX906-NEXT: v_lshrrev_b32_e32 v5, 8, v2
62 ; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc
63 ; GFX906-NEXT: s_cbranch_execz .LBB1_2
64 ; GFX906-NEXT: ; %bb.1: ; %bb.1
65 ; GFX906-NEXT: global_load_dword v2, v6, s[6:7]
66 ; GFX906-NEXT: s_waitcnt vmcnt(0)
67 ; GFX906-NEXT: v_lshrrev_b32_e32 v3, 24, v2
68 ; GFX906-NEXT: v_lshrrev_b32_e32 v4, 16, v2
69 ; GFX906-NEXT: v_lshrrev_b32_e32 v5, 8, v2
70 ; GFX906-NEXT: .LBB1_2: ; %bb.2
71 ; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
72 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v5
73 ; GFX906-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
74 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v3
75 ; GFX906-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
76 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
77 ; GFX906-NEXT: global_store_dword v1, v0, s[2:3]
78 ; GFX906-NEXT: s_endpgm
80 %idx = call i32 @llvm.amdgcn.workitem.id.x()
81 %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %src1, i32 %idx
82 %vec1 = load <4 x i8>, ptr addrspace(1) %gep1
83 %gep2 = getelementptr <4 x i8>, ptr addrspace(1) %src2, i32 %idx
84 %vec2 = load <4 x i8>, ptr addrspace(1) %gep2
85 %cmp = icmp ult i32 %idx, 15
86 br i1 %cmp, label %bb.1, label %bb.2
91 %tmp5 = phi <4 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ]
92 store <4 x i8> %tmp5, ptr addrspace(1) %dst, align 4
96 define amdgpu_kernel void @v5i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) {
97 ; GFX906-LABEL: v5i8_liveout:
98 ; GFX906: ; %bb.0: ; %entry
99 ; GFX906-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
100 ; GFX906-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
101 ; GFX906-NEXT: v_lshlrev_b32_e32 v7, 3, v0
102 ; GFX906-NEXT: v_mov_b32_e32 v5, 0
103 ; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
104 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
105 ; GFX906-NEXT: global_load_dwordx2 v[1:2], v7, s[4:5]
106 ; GFX906-NEXT: s_waitcnt vmcnt(0)
107 ; GFX906-NEXT: v_lshrrev_b64 v[3:4], 24, v[1:2]
108 ; GFX906-NEXT: v_lshrrev_b32_e32 v4, 16, v1
109 ; GFX906-NEXT: v_lshrrev_b32_e32 v6, 8, v1
110 ; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc
111 ; GFX906-NEXT: s_cbranch_execz .LBB2_2
112 ; GFX906-NEXT: ; %bb.1: ; %bb.1
113 ; GFX906-NEXT: global_load_dwordx2 v[1:2], v7, s[6:7]
114 ; GFX906-NEXT: s_waitcnt vmcnt(0)
115 ; GFX906-NEXT: v_lshrrev_b64 v[3:4], 24, v[1:2]
116 ; GFX906-NEXT: v_lshrrev_b32_e32 v4, 16, v1
117 ; GFX906-NEXT: v_lshrrev_b32_e32 v6, 8, v1
118 ; GFX906-NEXT: .LBB2_2: ; %bb.2
119 ; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
120 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v6
121 ; GFX906-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
122 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v3
123 ; GFX906-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
124 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
125 ; GFX906-NEXT: global_store_byte v5, v2, s[2:3] offset:4
126 ; GFX906-NEXT: global_store_dword v5, v0, s[2:3]
127 ; GFX906-NEXT: s_endpgm
129 %idx = call i32 @llvm.amdgcn.workitem.id.x()
130 %gep1 = getelementptr <5 x i8>, ptr addrspace(1) %src1, i32 %idx
131 %vec1 = load <5 x i8>, ptr addrspace(1) %gep1
132 %gep2 = getelementptr <5 x i8>, ptr addrspace(1) %src2, i32 %idx
133 %vec2 = load <5 x i8>, ptr addrspace(1) %gep2
134 %cmp = icmp ult i32 %idx, 15
135 br i1 %cmp, label %bb.1, label %bb.2
140 %tmp5 = phi <5 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ]
141 store <5 x i8> %tmp5, ptr addrspace(1) %dst, align 4
145 define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) {
146 ; GFX906-LABEL: v8i8_liveout:
147 ; GFX906: ; %bb.0: ; %entry
148 ; GFX906-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
149 ; GFX906-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
150 ; GFX906-NEXT: v_lshlrev_b32_e32 v10, 3, v0
151 ; GFX906-NEXT: v_mov_b32_e32 v3, 0
152 ; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
153 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
154 ; GFX906-NEXT: global_load_dwordx2 v[1:2], v10, s[4:5]
155 ; GFX906-NEXT: s_waitcnt vmcnt(0)
156 ; GFX906-NEXT: v_lshrrev_b32_e32 v4, 24, v2
157 ; GFX906-NEXT: v_lshrrev_b32_e32 v5, 16, v2
158 ; GFX906-NEXT: v_lshrrev_b32_e32 v6, 8, v2
159 ; GFX906-NEXT: v_lshrrev_b32_e32 v7, 24, v1
160 ; GFX906-NEXT: v_lshrrev_b32_e32 v8, 16, v1
161 ; GFX906-NEXT: v_lshrrev_b32_e32 v9, 8, v1
162 ; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc
163 ; GFX906-NEXT: s_cbranch_execz .LBB3_2
164 ; GFX906-NEXT: ; %bb.1: ; %bb.1
165 ; GFX906-NEXT: global_load_dwordx2 v[1:2], v10, s[6:7]
166 ; GFX906-NEXT: s_waitcnt vmcnt(0)
167 ; GFX906-NEXT: v_lshrrev_b32_e32 v4, 24, v2
168 ; GFX906-NEXT: v_lshrrev_b32_e32 v5, 16, v2
169 ; GFX906-NEXT: v_lshrrev_b32_e32 v6, 8, v2
170 ; GFX906-NEXT: v_lshrrev_b32_e32 v7, 24, v1
171 ; GFX906-NEXT: v_lshrrev_b32_e32 v8, 16, v1
172 ; GFX906-NEXT: v_lshrrev_b32_e32 v9, 8, v1
173 ; GFX906-NEXT: .LBB3_2: ; %bb.2
174 ; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
175 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v9
176 ; GFX906-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
177 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v7
178 ; GFX906-NEXT: v_or_b32_sdwa v1, v8, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
179 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
180 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v6
181 ; GFX906-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
182 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v4
183 ; GFX906-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
184 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
185 ; GFX906-NEXT: global_store_dwordx2 v3, v[0:1], s[2:3]
186 ; GFX906-NEXT: s_endpgm
188 %idx = call i32 @llvm.amdgcn.workitem.id.x()
189 %gep1 = getelementptr <8 x i8>, ptr addrspace(1) %src1, i32 %idx
190 %vec1 = load <8 x i8>, ptr addrspace(1) %gep1
191 %gep2 = getelementptr <8 x i8>, ptr addrspace(1) %src2, i32 %idx
192 %vec2 = load <8 x i8>, ptr addrspace(1) %gep2
193 %cmp = icmp ult i32 %idx, 15
194 br i1 %cmp, label %bb.1, label %bb.2
199 %tmp5 = phi <8 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ]
200 store <8 x i8> %tmp5, ptr addrspace(1) %dst, align 4
204 define amdgpu_kernel void @v16i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) {
205 ; GFX906-LABEL: v16i8_liveout:
206 ; GFX906: ; %bb.0: ; %entry
207 ; GFX906-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
208 ; GFX906-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
209 ; GFX906-NEXT: v_lshlrev_b32_e32 v18, 4, v0
210 ; GFX906-NEXT: v_mov_b32_e32 v5, 0
211 ; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
212 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
213 ; GFX906-NEXT: global_load_dwordx4 v[1:4], v18, s[4:5]
214 ; GFX906-NEXT: s_waitcnt vmcnt(0)
215 ; GFX906-NEXT: v_lshrrev_b32_e32 v6, 24, v4
216 ; GFX906-NEXT: v_lshrrev_b32_e32 v7, 16, v4
217 ; GFX906-NEXT: v_lshrrev_b32_e32 v8, 8, v4
218 ; GFX906-NEXT: v_lshrrev_b32_e32 v9, 24, v3
219 ; GFX906-NEXT: v_lshrrev_b32_e32 v10, 16, v3
220 ; GFX906-NEXT: v_lshrrev_b32_e32 v11, 8, v3
221 ; GFX906-NEXT: v_lshrrev_b32_e32 v12, 24, v2
222 ; GFX906-NEXT: v_lshrrev_b32_e32 v13, 16, v2
223 ; GFX906-NEXT: v_lshrrev_b32_e32 v14, 8, v2
224 ; GFX906-NEXT: v_lshrrev_b32_e32 v15, 24, v1
225 ; GFX906-NEXT: v_lshrrev_b32_e32 v16, 16, v1
226 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 8, v1
227 ; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc
228 ; GFX906-NEXT: s_cbranch_execz .LBB4_2
229 ; GFX906-NEXT: ; %bb.1: ; %bb.1
230 ; GFX906-NEXT: global_load_dwordx4 v[1:4], v18, s[6:7]
231 ; GFX906-NEXT: s_waitcnt vmcnt(0)
232 ; GFX906-NEXT: v_lshrrev_b32_e32 v6, 24, v4
233 ; GFX906-NEXT: v_lshrrev_b32_e32 v7, 16, v4
234 ; GFX906-NEXT: v_lshrrev_b32_e32 v8, 8, v4
235 ; GFX906-NEXT: v_lshrrev_b32_e32 v9, 24, v3
236 ; GFX906-NEXT: v_lshrrev_b32_e32 v10, 16, v3
237 ; GFX906-NEXT: v_lshrrev_b32_e32 v11, 8, v3
238 ; GFX906-NEXT: v_lshrrev_b32_e32 v12, 24, v2
239 ; GFX906-NEXT: v_lshrrev_b32_e32 v13, 16, v2
240 ; GFX906-NEXT: v_lshrrev_b32_e32 v14, 8, v2
241 ; GFX906-NEXT: v_lshrrev_b32_e32 v15, 24, v1
242 ; GFX906-NEXT: v_lshrrev_b32_e32 v16, 16, v1
243 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 8, v1
244 ; GFX906-NEXT: .LBB4_2: ; %bb.2
245 ; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
246 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v17
247 ; GFX906-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
248 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v15
249 ; GFX906-NEXT: v_or_b32_sdwa v1, v16, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
250 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
251 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v14
252 ; GFX906-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
253 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v12
254 ; GFX906-NEXT: v_or_b32_sdwa v2, v13, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
255 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
256 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v11
257 ; GFX906-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
258 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v9
259 ; GFX906-NEXT: v_or_b32_sdwa v3, v10, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
260 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
261 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v8
262 ; GFX906-NEXT: v_or_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
263 ; GFX906-NEXT: v_lshlrev_b16_e32 v4, 8, v6
264 ; GFX906-NEXT: v_or_b32_sdwa v4, v7, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
265 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
266 ; GFX906-NEXT: global_store_dwordx4 v5, v[0:3], s[2:3]
267 ; GFX906-NEXT: s_endpgm
269 %idx = call i32 @llvm.amdgcn.workitem.id.x()
270 %gep1 = getelementptr <16 x i8>, ptr addrspace(1) %src1, i32 %idx
271 %vec1 = load <16 x i8>, ptr addrspace(1) %gep1
272 %gep2 = getelementptr <16 x i8>, ptr addrspace(1) %src2, i32 %idx
273 %vec2 = load <16 x i8>, ptr addrspace(1) %gep2
274 %cmp = icmp ult i32 %idx, 15
275 br i1 %cmp, label %bb.1, label %bb.2
280 %tmp5 = phi <16 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ]
281 store <16 x i8> %tmp5, ptr addrspace(1) %dst, align 4
285 define amdgpu_kernel void @v32i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) {
286 ; GFX906-LABEL: v32i8_liveout:
287 ; GFX906: ; %bb.0: ; %entry
288 ; GFX906-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
289 ; GFX906-NEXT: v_lshlrev_b32_e32 v31, 5, v0
290 ; GFX906-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
291 ; GFX906-NEXT: v_mov_b32_e32 v9, 0
292 ; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
293 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
294 ; GFX906-NEXT: global_load_dwordx4 v[1:4], v31, s[4:5] offset:16
295 ; GFX906-NEXT: global_load_dwordx4 v[5:8], v31, s[4:5]
296 ; GFX906-NEXT: s_waitcnt vmcnt(1)
297 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v4
298 ; GFX906-NEXT: v_lshrrev_b32_e32 v10, 16, v4
299 ; GFX906-NEXT: v_lshrrev_b32_e32 v11, 8, v4
300 ; GFX906-NEXT: v_lshrrev_b32_e32 v12, 24, v3
301 ; GFX906-NEXT: v_lshrrev_b32_e32 v13, 16, v3
302 ; GFX906-NEXT: v_lshrrev_b32_e32 v14, 8, v3
303 ; GFX906-NEXT: v_lshrrev_b32_e32 v15, 24, v2
304 ; GFX906-NEXT: v_lshrrev_b32_e32 v16, 16, v2
305 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 8, v2
306 ; GFX906-NEXT: v_lshrrev_b32_e32 v18, 24, v1
307 ; GFX906-NEXT: v_lshrrev_b32_e32 v19, 16, v1
308 ; GFX906-NEXT: v_lshrrev_b32_e32 v20, 8, v1
309 ; GFX906-NEXT: s_waitcnt vmcnt(0)
310 ; GFX906-NEXT: v_lshrrev_b32_e32 v21, 24, v8
311 ; GFX906-NEXT: v_lshrrev_b32_e32 v22, 16, v8
312 ; GFX906-NEXT: v_lshrrev_b32_e32 v23, 8, v8
313 ; GFX906-NEXT: v_lshrrev_b32_e32 v24, 24, v7
314 ; GFX906-NEXT: v_lshrrev_b32_e32 v25, 16, v7
315 ; GFX906-NEXT: v_lshrrev_b32_e32 v26, 8, v7
316 ; GFX906-NEXT: v_lshrrev_b32_e32 v27, 24, v6
317 ; GFX906-NEXT: v_lshrrev_b32_e32 v28, 16, v6
318 ; GFX906-NEXT: v_lshrrev_b32_e32 v29, 8, v6
319 ; GFX906-NEXT: v_lshrrev_b32_e32 v30, 24, v5
320 ; GFX906-NEXT: v_lshrrev_b32_e32 v32, 16, v5
321 ; GFX906-NEXT: v_lshrrev_b32_e32 v33, 8, v5
322 ; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
323 ; GFX906-NEXT: s_cbranch_execz .LBB5_2
324 ; GFX906-NEXT: ; %bb.1: ; %bb.1
325 ; GFX906-NEXT: global_load_dwordx4 v[1:4], v31, s[6:7] offset:16
326 ; GFX906-NEXT: global_load_dwordx4 v[5:8], v31, s[6:7]
327 ; GFX906-NEXT: s_waitcnt vmcnt(1)
328 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v4
329 ; GFX906-NEXT: v_lshrrev_b32_e32 v10, 16, v4
330 ; GFX906-NEXT: v_lshrrev_b32_e32 v11, 8, v4
331 ; GFX906-NEXT: v_lshrrev_b32_e32 v12, 24, v3
332 ; GFX906-NEXT: v_lshrrev_b32_e32 v13, 16, v3
333 ; GFX906-NEXT: v_lshrrev_b32_e32 v14, 8, v3
334 ; GFX906-NEXT: v_lshrrev_b32_e32 v15, 24, v2
335 ; GFX906-NEXT: v_lshrrev_b32_e32 v16, 16, v2
336 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 8, v2
337 ; GFX906-NEXT: v_lshrrev_b32_e32 v18, 24, v1
338 ; GFX906-NEXT: v_lshrrev_b32_e32 v19, 16, v1
339 ; GFX906-NEXT: v_lshrrev_b32_e32 v20, 8, v1
340 ; GFX906-NEXT: s_waitcnt vmcnt(0)
341 ; GFX906-NEXT: v_lshrrev_b32_e32 v21, 24, v8
342 ; GFX906-NEXT: v_lshrrev_b32_e32 v22, 16, v8
343 ; GFX906-NEXT: v_lshrrev_b32_e32 v23, 8, v8
344 ; GFX906-NEXT: v_lshrrev_b32_e32 v24, 24, v7
345 ; GFX906-NEXT: v_lshrrev_b32_e32 v25, 16, v7
346 ; GFX906-NEXT: v_lshrrev_b32_e32 v26, 8, v7
347 ; GFX906-NEXT: v_lshrrev_b32_e32 v27, 24, v6
348 ; GFX906-NEXT: v_lshrrev_b32_e32 v28, 16, v6
349 ; GFX906-NEXT: v_lshrrev_b32_e32 v29, 8, v6
350 ; GFX906-NEXT: v_lshrrev_b32_e32 v30, 24, v5
351 ; GFX906-NEXT: v_lshrrev_b32_e32 v32, 16, v5
352 ; GFX906-NEXT: v_lshrrev_b32_e32 v33, 8, v5
353 ; GFX906-NEXT: .LBB5_2: ; %bb.2
354 ; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
355 ; GFX906-NEXT: v_lshlrev_b16_e32 v30, 8, v30
356 ; GFX906-NEXT: v_lshlrev_b16_e32 v31, 8, v33
357 ; GFX906-NEXT: v_lshlrev_b16_e32 v29, 8, v29
358 ; GFX906-NEXT: v_lshlrev_b16_e32 v27, 8, v27
359 ; GFX906-NEXT: v_lshlrev_b16_e32 v26, 8, v26
360 ; GFX906-NEXT: v_lshlrev_b16_e32 v24, 8, v24
361 ; GFX906-NEXT: v_lshlrev_b16_e32 v23, 8, v23
362 ; GFX906-NEXT: v_lshlrev_b16_e32 v21, 8, v21
363 ; GFX906-NEXT: v_or_b32_sdwa v30, v32, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
364 ; GFX906-NEXT: v_or_b32_sdwa v5, v5, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
365 ; GFX906-NEXT: v_or_b32_sdwa v6, v6, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
366 ; GFX906-NEXT: v_or_b32_sdwa v27, v28, v27 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
367 ; GFX906-NEXT: v_or_b32_sdwa v7, v7, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
368 ; GFX906-NEXT: v_or_b32_sdwa v24, v25, v24 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
369 ; GFX906-NEXT: v_or_b32_sdwa v8, v8, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
370 ; GFX906-NEXT: v_or_b32_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
371 ; GFX906-NEXT: v_or_b32_sdwa v5, v5, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
372 ; GFX906-NEXT: v_or_b32_sdwa v6, v6, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
373 ; GFX906-NEXT: v_or_b32_sdwa v7, v7, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
374 ; GFX906-NEXT: v_or_b32_sdwa v8, v8, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
375 ; GFX906-NEXT: global_store_dwordx4 v9, v[5:8], s[0:1]
376 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
377 ; GFX906-NEXT: v_lshlrev_b16_e32 v5, 8, v20
378 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
379 ; GFX906-NEXT: v_lshlrev_b16_e32 v5, 8, v18
380 ; GFX906-NEXT: v_or_b32_sdwa v5, v19, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
381 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
382 ; GFX906-NEXT: v_lshlrev_b16_e32 v5, 8, v17
383 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
384 ; GFX906-NEXT: v_lshlrev_b16_e32 v5, 8, v15
385 ; GFX906-NEXT: v_or_b32_sdwa v5, v16, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
386 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
387 ; GFX906-NEXT: v_lshlrev_b16_e32 v5, 8, v14
388 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
389 ; GFX906-NEXT: v_lshlrev_b16_e32 v5, 8, v12
390 ; GFX906-NEXT: v_or_b32_sdwa v5, v13, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
391 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
392 ; GFX906-NEXT: v_lshlrev_b16_e32 v5, 8, v11
393 ; GFX906-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
394 ; GFX906-NEXT: v_or_b32_sdwa v0, v10, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
395 ; GFX906-NEXT: v_or_b32_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
396 ; GFX906-NEXT: global_store_dwordx4 v9, v[1:4], s[0:1] offset:16
397 ; GFX906-NEXT: s_endpgm
399 %idx = call i32 @llvm.amdgcn.workitem.id.x()
400 %gep1 = getelementptr <32 x i8>, ptr addrspace(1) %src1, i32 %idx
401 %vec1 = load <32 x i8>, ptr addrspace(1) %gep1
402 %gep2 = getelementptr <32 x i8>, ptr addrspace(1) %src2, i32 %idx
403 %vec2 = load <32 x i8>, ptr addrspace(1) %gep2
404 %cmp = icmp ult i32 %idx, 15
405 br i1 %cmp, label %bb.1, label %bb.2
410 %tmp5 = phi <32 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ]
411 store <32 x i8> %tmp5, ptr addrspace(1) %dst, align 4
415 define amdgpu_kernel void @v256i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) {
416 ; GFX906-LABEL: v256i8_liveout:
417 ; GFX906: ; %bb.0: ; %entry
418 ; GFX906-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
419 ; GFX906-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
420 ; GFX906-NEXT: s_mov_b32 s10, -1
421 ; GFX906-NEXT: s_mov_b32 s11, 0xe00000
422 ; GFX906-NEXT: s_add_u32 s8, s8, s3
423 ; GFX906-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
424 ; GFX906-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
425 ; GFX906-NEXT: v_lshlrev_b32_e32 v63, 3, v0
426 ; GFX906-NEXT: s_addc_u32 s9, s9, 0
427 ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
428 ; GFX906-NEXT: global_load_dwordx4 v[17:20], v63, s[4:5] offset:240
429 ; GFX906-NEXT: global_load_dwordx4 v[5:8], v63, s[4:5] offset:224
430 ; GFX906-NEXT: global_load_dwordx4 v[9:12], v63, s[4:5] offset:208
431 ; GFX906-NEXT: global_load_dwordx4 v[13:16], v63, s[4:5] offset:192
432 ; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
433 ; GFX906-NEXT: v_mov_b32_e32 v4, 0
434 ; GFX906-NEXT: s_waitcnt vmcnt(3)
435 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v20
436 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:20 ; 4-byte Folded Spill
437 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v20
438 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:24 ; 4-byte Folded Spill
439 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v20
440 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:28 ; 4-byte Folded Spill
441 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v19
442 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:32 ; 4-byte Folded Spill
443 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v19
444 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:36 ; 4-byte Folded Spill
445 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v19
446 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:40 ; 4-byte Folded Spill
447 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v18
448 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:44 ; 4-byte Folded Spill
449 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v18
450 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:48 ; 4-byte Folded Spill
451 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v18
452 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:52 ; 4-byte Folded Spill
453 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v17
454 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:56 ; 4-byte Folded Spill
455 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v17
456 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:60 ; 4-byte Folded Spill
457 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
458 ; GFX906-NEXT: s_waitcnt vmcnt(0)
459 ; GFX906-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:8 ; 4-byte Folded Spill
460 ; GFX906-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:12 ; 4-byte Folded Spill
461 ; GFX906-NEXT: buffer_store_dword v20, off, s[8:11], 0 offset:16 ; 4-byte Folded Spill
462 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v17
463 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:64 ; 4-byte Folded Spill
464 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v8
465 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:68 ; 4-byte Folded Spill
466 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v8
467 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:72 ; 4-byte Folded Spill
468 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v8
469 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:76 ; 4-byte Folded Spill
470 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v7
471 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:80 ; 4-byte Folded Spill
472 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v7
473 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:84 ; 4-byte Folded Spill
474 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v7
475 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:88 ; 4-byte Folded Spill
476 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v6
477 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:92 ; 4-byte Folded Spill
478 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v6
479 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:96 ; 4-byte Folded Spill
480 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v6
481 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:100 ; 4-byte Folded Spill
482 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v5
483 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:104 ; 4-byte Folded Spill
484 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v5
485 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:108 ; 4-byte Folded Spill
486 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v5
487 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:112 ; 4-byte Folded Spill
488 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v12
489 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:116 ; 4-byte Folded Spill
490 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v12
491 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:120 ; 4-byte Folded Spill
492 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v12
493 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:124 ; 4-byte Folded Spill
494 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v11
495 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:128 ; 4-byte Folded Spill
496 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v11
497 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:132 ; 4-byte Folded Spill
498 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v11
499 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:136 ; 4-byte Folded Spill
500 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v10
501 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:140 ; 4-byte Folded Spill
502 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v10
503 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:144 ; 4-byte Folded Spill
504 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v10
505 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:148 ; 4-byte Folded Spill
506 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v9
507 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:152 ; 4-byte Folded Spill
508 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v9
509 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:156 ; 4-byte Folded Spill
510 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v9
511 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:160 ; 4-byte Folded Spill
512 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v16
513 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:164 ; 4-byte Folded Spill
514 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v16
515 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:168 ; 4-byte Folded Spill
516 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v16
517 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:172 ; 4-byte Folded Spill
518 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v15
519 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:180 ; 4-byte Folded Spill
520 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v15
521 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:184 ; 4-byte Folded Spill
522 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v15
523 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:176 ; 4-byte Folded Spill
524 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v14
525 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:192 ; 4-byte Folded Spill
526 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v14
527 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:196 ; 4-byte Folded Spill
528 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v14
529 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:188 ; 4-byte Folded Spill
530 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v13
531 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:204 ; 4-byte Folded Spill
532 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v13
533 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:208 ; 4-byte Folded Spill
534 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v13
535 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:200 ; 4-byte Folded Spill
536 ; GFX906-NEXT: global_load_dwordx4 v[17:20], v63, s[4:5] offset:176
537 ; GFX906-NEXT: global_load_dwordx4 v[21:24], v63, s[4:5] offset:160
538 ; GFX906-NEXT: s_waitcnt vmcnt(1)
539 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v20
540 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:212 ; 4-byte Folded Spill
541 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v20
542 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:216 ; 4-byte Folded Spill
543 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v20
544 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:228 ; 4-byte Folded Spill
545 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v19
546 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:220 ; 4-byte Folded Spill
547 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v19
548 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:224 ; 4-byte Folded Spill
549 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v19
550 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:240 ; 4-byte Folded Spill
551 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v18
552 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:232 ; 4-byte Folded Spill
553 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v18
554 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:236 ; 4-byte Folded Spill
555 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v18
556 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:252 ; 4-byte Folded Spill
557 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v17
558 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:244 ; 4-byte Folded Spill
559 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v17
560 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:248 ; 4-byte Folded Spill
561 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v17
562 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:256 ; 4-byte Folded Spill
563 ; GFX906-NEXT: s_waitcnt vmcnt(12)
564 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v24
565 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:260 ; 4-byte Folded Spill
566 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v24
567 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:264 ; 4-byte Folded Spill
568 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v24
569 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:276 ; 4-byte Folded Spill
570 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v23
571 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:268 ; 4-byte Folded Spill
572 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v23
573 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:272 ; 4-byte Folded Spill
574 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v23
575 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:288 ; 4-byte Folded Spill
576 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v22
577 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:280 ; 4-byte Folded Spill
578 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v22
579 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:284 ; 4-byte Folded Spill
580 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v22
581 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:300 ; 4-byte Folded Spill
582 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v21
583 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:292 ; 4-byte Folded Spill
584 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v21
585 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:296 ; 4-byte Folded Spill
586 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v21
587 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:304 ; 4-byte Folded Spill
588 ; GFX906-NEXT: global_load_dwordx4 v[25:28], v63, s[4:5] offset:144
589 ; GFX906-NEXT: global_load_dwordx4 v[29:32], v63, s[4:5] offset:128
590 ; GFX906-NEXT: s_waitcnt vmcnt(1)
591 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v28
592 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:308 ; 4-byte Folded Spill
593 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v28
594 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:312 ; 4-byte Folded Spill
595 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v28
596 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:324 ; 4-byte Folded Spill
597 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v27
598 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:316 ; 4-byte Folded Spill
599 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v27
600 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:320 ; 4-byte Folded Spill
601 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v27
602 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:336 ; 4-byte Folded Spill
603 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v26
604 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:328 ; 4-byte Folded Spill
605 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v26
606 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:332 ; 4-byte Folded Spill
607 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v26
608 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:348 ; 4-byte Folded Spill
609 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v25
610 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:340 ; 4-byte Folded Spill
611 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v25
612 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:344 ; 4-byte Folded Spill
613 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v25
614 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:352 ; 4-byte Folded Spill
615 ; GFX906-NEXT: s_waitcnt vmcnt(12)
616 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v32
617 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:356 ; 4-byte Folded Spill
618 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v32
619 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:360 ; 4-byte Folded Spill
620 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v32
621 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:372 ; 4-byte Folded Spill
622 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v31
623 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:364 ; 4-byte Folded Spill
624 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v31
625 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:368 ; 4-byte Folded Spill
626 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v31
627 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:384 ; 4-byte Folded Spill
628 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v30
629 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:376 ; 4-byte Folded Spill
630 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v30
631 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:380 ; 4-byte Folded Spill
632 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v30
633 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:396 ; 4-byte Folded Spill
634 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v29
635 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:388 ; 4-byte Folded Spill
636 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v29
637 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:392 ; 4-byte Folded Spill
638 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v29
639 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:400 ; 4-byte Folded Spill
640 ; GFX906-NEXT: global_load_dwordx4 v[33:36], v63, s[4:5] offset:112
641 ; GFX906-NEXT: global_load_dwordx4 v[37:40], v63, s[4:5] offset:96
642 ; GFX906-NEXT: s_waitcnt vmcnt(1)
643 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v36
644 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:404 ; 4-byte Folded Spill
645 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v36
646 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:408 ; 4-byte Folded Spill
647 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v36
648 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:420 ; 4-byte Folded Spill
649 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v35
650 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:412 ; 4-byte Folded Spill
651 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v35
652 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:416 ; 4-byte Folded Spill
653 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v35
654 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:432 ; 4-byte Folded Spill
655 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v34
656 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:424 ; 4-byte Folded Spill
657 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v34
658 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:428 ; 4-byte Folded Spill
659 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v34
660 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:444 ; 4-byte Folded Spill
661 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v33
662 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:436 ; 4-byte Folded Spill
663 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v33
664 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:440 ; 4-byte Folded Spill
665 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v33
666 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:448 ; 4-byte Folded Spill
667 ; GFX906-NEXT: s_waitcnt vmcnt(12)
668 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v40
669 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:452 ; 4-byte Folded Spill
670 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v40
671 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:456 ; 4-byte Folded Spill
672 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v40
673 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:468 ; 4-byte Folded Spill
674 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v39
675 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:460 ; 4-byte Folded Spill
676 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v39
677 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:464 ; 4-byte Folded Spill
678 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v39
679 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:480 ; 4-byte Folded Spill
680 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v38
681 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:472 ; 4-byte Folded Spill
682 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v38
683 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:476 ; 4-byte Folded Spill
684 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v38
685 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:492 ; 4-byte Folded Spill
686 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v37
687 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:484 ; 4-byte Folded Spill
688 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v37
689 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:488 ; 4-byte Folded Spill
690 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v37
691 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:496 ; 4-byte Folded Spill
692 ; GFX906-NEXT: global_load_dwordx4 v[41:44], v63, s[4:5] offset:80
693 ; GFX906-NEXT: global_load_dwordx4 v[45:48], v63, s[4:5] offset:64
694 ; GFX906-NEXT: s_waitcnt vmcnt(1)
695 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v44
696 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:500 ; 4-byte Folded Spill
697 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v44
698 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:504 ; 4-byte Folded Spill
699 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v44
700 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:516 ; 4-byte Folded Spill
701 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v43
702 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:508 ; 4-byte Folded Spill
703 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v43
704 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:512 ; 4-byte Folded Spill
705 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v43
706 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:528 ; 4-byte Folded Spill
707 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v42
708 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:520 ; 4-byte Folded Spill
709 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v42
710 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:524 ; 4-byte Folded Spill
711 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v42
712 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:540 ; 4-byte Folded Spill
713 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v41
714 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:532 ; 4-byte Folded Spill
715 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v41
716 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:536 ; 4-byte Folded Spill
717 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v41
718 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:544 ; 4-byte Folded Spill
719 ; GFX906-NEXT: s_waitcnt vmcnt(12)
720 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v48
721 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:548 ; 4-byte Folded Spill
722 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v48
723 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:552 ; 4-byte Folded Spill
724 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v48
725 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:564 ; 4-byte Folded Spill
726 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v47
727 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:556 ; 4-byte Folded Spill
728 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v47
729 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:560 ; 4-byte Folded Spill
730 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v47
731 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:576 ; 4-byte Folded Spill
732 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v46
733 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:568 ; 4-byte Folded Spill
734 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v46
735 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:572 ; 4-byte Folded Spill
736 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v46
737 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:588 ; 4-byte Folded Spill
738 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v45
739 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:580 ; 4-byte Folded Spill
740 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v45
741 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:584 ; 4-byte Folded Spill
742 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v45
743 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:592 ; 4-byte Folded Spill
744 ; GFX906-NEXT: global_load_dwordx4 v[49:52], v63, s[4:5] offset:48
745 ; GFX906-NEXT: global_load_dwordx4 v[53:56], v63, s[4:5] offset:32
746 ; GFX906-NEXT: s_waitcnt vmcnt(1)
747 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v52
748 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:596 ; 4-byte Folded Spill
749 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v52
750 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:600 ; 4-byte Folded Spill
751 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v52
752 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:612 ; 4-byte Folded Spill
753 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v51
754 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:604 ; 4-byte Folded Spill
755 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v51
756 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:608 ; 4-byte Folded Spill
757 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v51
758 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:624 ; 4-byte Folded Spill
759 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v50
760 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:616 ; 4-byte Folded Spill
761 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v50
762 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:620 ; 4-byte Folded Spill
763 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v50
764 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:636 ; 4-byte Folded Spill
765 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v49
766 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:628 ; 4-byte Folded Spill
767 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v49
768 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:632 ; 4-byte Folded Spill
769 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v49
770 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:640 ; 4-byte Folded Spill
771 ; GFX906-NEXT: s_waitcnt vmcnt(12)
772 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v56
773 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:644 ; 4-byte Folded Spill
774 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v56
775 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:648 ; 4-byte Folded Spill
776 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v56
777 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:660 ; 4-byte Folded Spill
778 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v55
779 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:652 ; 4-byte Folded Spill
780 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v55
781 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:656 ; 4-byte Folded Spill
782 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v55
783 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:672 ; 4-byte Folded Spill
784 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v54
785 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:664 ; 4-byte Folded Spill
786 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v54
787 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:668 ; 4-byte Folded Spill
788 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v54
789 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:684 ; 4-byte Folded Spill
790 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v53
791 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:676 ; 4-byte Folded Spill
792 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v53
793 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:680 ; 4-byte Folded Spill
794 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v53
795 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:688 ; 4-byte Folded Spill
796 ; GFX906-NEXT: global_load_dwordx4 v[57:60], v63, s[4:5] offset:16
797 ; GFX906-NEXT: s_nop 0
798 ; GFX906-NEXT: global_load_dwordx4 v[0:3], v63, s[4:5]
799 ; GFX906-NEXT: s_waitcnt vmcnt(1)
800 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v60
801 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:692 ; 4-byte Folded Spill
802 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v60
803 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:696 ; 4-byte Folded Spill
804 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v60
805 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:708 ; 4-byte Folded Spill
806 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v59
807 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:700 ; 4-byte Folded Spill
808 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v59
809 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:704 ; 4-byte Folded Spill
810 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v59
811 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:720 ; 4-byte Folded Spill
812 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v58
813 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:712 ; 4-byte Folded Spill
814 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v58
815 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:716 ; 4-byte Folded Spill
816 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v58
817 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:732 ; 4-byte Folded Spill
818 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v57
819 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:724 ; 4-byte Folded Spill
820 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v57
821 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:728 ; 4-byte Folded Spill
822 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v57
823 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:736 ; 4-byte Folded Spill
824 ; GFX906-NEXT: s_waitcnt vmcnt(12)
825 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v3
826 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:740 ; 4-byte Folded Spill
827 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v3
828 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:744 ; 4-byte Folded Spill
829 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v3
830 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:756 ; 4-byte Folded Spill
831 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v2
832 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:748 ; 4-byte Folded Spill
833 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v2
834 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:752 ; 4-byte Folded Spill
835 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v2
836 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:768 ; 4-byte Folded Spill
837 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v1
838 ; GFX906-NEXT: v_lshrrev_b32_e32 v62, 24, v0
839 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:760 ; 4-byte Folded Spill
840 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v1
841 ; GFX906-NEXT: buffer_store_dword v62, off, s[8:11], 0 offset:772 ; 4-byte Folded Spill
842 ; GFX906-NEXT: v_lshrrev_b32_e32 v62, 16, v0
843 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:764 ; 4-byte Folded Spill
844 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v1
845 ; GFX906-NEXT: buffer_store_dword v62, off, s[8:11], 0 offset:776 ; 4-byte Folded Spill
846 ; GFX906-NEXT: v_lshrrev_b32_e32 v62, 8, v0
847 ; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc
848 ; GFX906-NEXT: s_cbranch_execz .LBB6_2
849 ; GFX906-NEXT: ; %bb.1: ; %bb.1
850 ; GFX906-NEXT: global_load_dwordx4 v[0:3], v63, s[6:7] offset:240
851 ; GFX906-NEXT: global_load_dwordx4 v[5:8], v63, s[6:7] offset:224
852 ; GFX906-NEXT: global_load_dwordx4 v[9:12], v63, s[6:7] offset:208
853 ; GFX906-NEXT: global_load_dwordx4 v[13:16], v63, s[6:7] offset:192
854 ; GFX906-NEXT: s_waitcnt vmcnt(3)
855 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 24, v3
856 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:20 ; 4-byte Folded Spill
857 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 16, v3
858 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:24 ; 4-byte Folded Spill
859 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 8, v3
860 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:28 ; 4-byte Folded Spill
861 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 24, v2
862 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:32 ; 4-byte Folded Spill
863 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 16, v2
864 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:36 ; 4-byte Folded Spill
865 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 8, v2
866 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:40 ; 4-byte Folded Spill
867 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 24, v1
868 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:44 ; 4-byte Folded Spill
869 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 16, v1
870 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:48 ; 4-byte Folded Spill
871 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 8, v1
872 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:52 ; 4-byte Folded Spill
873 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 24, v0
874 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:56 ; 4-byte Folded Spill
875 ; GFX906-NEXT: v_lshrrev_b32_e32 v17, 16, v0
876 ; GFX906-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:60 ; 4-byte Folded Spill
877 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
878 ; GFX906-NEXT: s_waitcnt vmcnt(0)
879 ; GFX906-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Spill
880 ; GFX906-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Spill
881 ; GFX906-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:16 ; 4-byte Folded Spill
882 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v0
883 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:64 ; 4-byte Folded Spill
884 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v8
885 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:68 ; 4-byte Folded Spill
886 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v8
887 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:72 ; 4-byte Folded Spill
888 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v8
889 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:76 ; 4-byte Folded Spill
890 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v7
891 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:80 ; 4-byte Folded Spill
892 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v7
893 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:84 ; 4-byte Folded Spill
894 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v7
895 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:88 ; 4-byte Folded Spill
896 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v6
897 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:92 ; 4-byte Folded Spill
898 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v6
899 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:96 ; 4-byte Folded Spill
900 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v6
901 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:100 ; 4-byte Folded Spill
902 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v5
903 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:104 ; 4-byte Folded Spill
904 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v5
905 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:108 ; 4-byte Folded Spill
906 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v5
907 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:112 ; 4-byte Folded Spill
908 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v12
909 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:116 ; 4-byte Folded Spill
910 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v12
911 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:120 ; 4-byte Folded Spill
912 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v12
913 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:124 ; 4-byte Folded Spill
914 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v11
915 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:128 ; 4-byte Folded Spill
916 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v11
917 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:132 ; 4-byte Folded Spill
918 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v11
919 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:136 ; 4-byte Folded Spill
920 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v10
921 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:140 ; 4-byte Folded Spill
922 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v10
923 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:144 ; 4-byte Folded Spill
924 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v10
925 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:148 ; 4-byte Folded Spill
926 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v9
927 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:152 ; 4-byte Folded Spill
928 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v9
929 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:156 ; 4-byte Folded Spill
930 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v9
931 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:160 ; 4-byte Folded Spill
932 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v16
933 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:164 ; 4-byte Folded Spill
934 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v16
935 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:168 ; 4-byte Folded Spill
936 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v16
937 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:172 ; 4-byte Folded Spill
938 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v15
939 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:180 ; 4-byte Folded Spill
940 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v15
941 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:184 ; 4-byte Folded Spill
942 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v15
943 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:176 ; 4-byte Folded Spill
944 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v14
945 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:192 ; 4-byte Folded Spill
946 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v14
947 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:196 ; 4-byte Folded Spill
948 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v14
949 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:188 ; 4-byte Folded Spill
950 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v13
951 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:204 ; 4-byte Folded Spill
952 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v13
953 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:208 ; 4-byte Folded Spill
954 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v13
955 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:200 ; 4-byte Folded Spill
956 ; GFX906-NEXT: global_load_dwordx4 v[17:20], v63, s[6:7] offset:176
957 ; GFX906-NEXT: global_load_dwordx4 v[21:24], v63, s[6:7] offset:160
958 ; GFX906-NEXT: s_waitcnt vmcnt(1)
959 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v20
960 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:212 ; 4-byte Folded Spill
961 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v20
962 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:216 ; 4-byte Folded Spill
963 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v20
964 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:228 ; 4-byte Folded Spill
965 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v19
966 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:220 ; 4-byte Folded Spill
967 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v19
968 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:224 ; 4-byte Folded Spill
969 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v19
970 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:240 ; 4-byte Folded Spill
971 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v18
972 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:232 ; 4-byte Folded Spill
973 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v18
974 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:236 ; 4-byte Folded Spill
975 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v18
976 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:252 ; 4-byte Folded Spill
977 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v17
978 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:244 ; 4-byte Folded Spill
979 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v17
980 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:248 ; 4-byte Folded Spill
981 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v17
982 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:256 ; 4-byte Folded Spill
983 ; GFX906-NEXT: s_waitcnt vmcnt(12)
984 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v24
985 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:260 ; 4-byte Folded Spill
986 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v24
987 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:264 ; 4-byte Folded Spill
988 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v24
989 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:276 ; 4-byte Folded Spill
990 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v23
991 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:268 ; 4-byte Folded Spill
992 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v23
993 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:272 ; 4-byte Folded Spill
994 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v23
995 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:288 ; 4-byte Folded Spill
996 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v22
997 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:280 ; 4-byte Folded Spill
998 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v22
999 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:284 ; 4-byte Folded Spill
1000 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v22
1001 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:300 ; 4-byte Folded Spill
1002 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v21
1003 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:292 ; 4-byte Folded Spill
1004 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v21
1005 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:296 ; 4-byte Folded Spill
1006 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v21
1007 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:304 ; 4-byte Folded Spill
1008 ; GFX906-NEXT: global_load_dwordx4 v[25:28], v63, s[6:7] offset:144
1009 ; GFX906-NEXT: global_load_dwordx4 v[29:32], v63, s[6:7] offset:128
1010 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1011 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v28
1012 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:308 ; 4-byte Folded Spill
1013 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v28
1014 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:312 ; 4-byte Folded Spill
1015 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v28
1016 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:324 ; 4-byte Folded Spill
1017 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v27
1018 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:316 ; 4-byte Folded Spill
1019 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v27
1020 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:320 ; 4-byte Folded Spill
1021 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v27
1022 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:336 ; 4-byte Folded Spill
1023 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v26
1024 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:328 ; 4-byte Folded Spill
1025 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v26
1026 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:332 ; 4-byte Folded Spill
1027 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v26
1028 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:348 ; 4-byte Folded Spill
1029 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v25
1030 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:340 ; 4-byte Folded Spill
1031 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v25
1032 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:344 ; 4-byte Folded Spill
1033 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v25
1034 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:352 ; 4-byte Folded Spill
1035 ; GFX906-NEXT: s_waitcnt vmcnt(12)
1036 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v32
1037 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:356 ; 4-byte Folded Spill
1038 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v32
1039 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:360 ; 4-byte Folded Spill
1040 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v32
1041 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:372 ; 4-byte Folded Spill
1042 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v31
1043 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:364 ; 4-byte Folded Spill
1044 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v31
1045 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:368 ; 4-byte Folded Spill
1046 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v31
1047 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:384 ; 4-byte Folded Spill
1048 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v30
1049 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:376 ; 4-byte Folded Spill
1050 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v30
1051 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:380 ; 4-byte Folded Spill
1052 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v30
1053 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:396 ; 4-byte Folded Spill
1054 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v29
1055 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:388 ; 4-byte Folded Spill
1056 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v29
1057 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:392 ; 4-byte Folded Spill
1058 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v29
1059 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:400 ; 4-byte Folded Spill
1060 ; GFX906-NEXT: global_load_dwordx4 v[33:36], v63, s[6:7] offset:112
1061 ; GFX906-NEXT: global_load_dwordx4 v[37:40], v63, s[6:7] offset:96
1062 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1063 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v36
1064 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:404 ; 4-byte Folded Spill
1065 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v36
1066 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:408 ; 4-byte Folded Spill
1067 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v36
1068 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:420 ; 4-byte Folded Spill
1069 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v35
1070 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:412 ; 4-byte Folded Spill
1071 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v35
1072 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:416 ; 4-byte Folded Spill
1073 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v35
1074 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:432 ; 4-byte Folded Spill
1075 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v34
1076 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:424 ; 4-byte Folded Spill
1077 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v34
1078 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:428 ; 4-byte Folded Spill
1079 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v34
1080 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:444 ; 4-byte Folded Spill
1081 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v33
1082 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:436 ; 4-byte Folded Spill
1083 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v33
1084 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:440 ; 4-byte Folded Spill
1085 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v33
1086 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:448 ; 4-byte Folded Spill
1087 ; GFX906-NEXT: s_waitcnt vmcnt(12)
1088 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v40
1089 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:452 ; 4-byte Folded Spill
1090 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v40
1091 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:456 ; 4-byte Folded Spill
1092 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v40
1093 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:468 ; 4-byte Folded Spill
1094 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v39
1095 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:460 ; 4-byte Folded Spill
1096 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v39
1097 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:464 ; 4-byte Folded Spill
1098 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v39
1099 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:480 ; 4-byte Folded Spill
1100 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v38
1101 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:472 ; 4-byte Folded Spill
1102 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v38
1103 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:476 ; 4-byte Folded Spill
1104 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v38
1105 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:492 ; 4-byte Folded Spill
1106 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v37
1107 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:484 ; 4-byte Folded Spill
1108 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v37
1109 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:488 ; 4-byte Folded Spill
1110 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v37
1111 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:496 ; 4-byte Folded Spill
1112 ; GFX906-NEXT: global_load_dwordx4 v[41:44], v63, s[6:7] offset:80
1113 ; GFX906-NEXT: global_load_dwordx4 v[45:48], v63, s[6:7] offset:64
1114 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1115 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v44
1116 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:500 ; 4-byte Folded Spill
1117 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v44
1118 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:504 ; 4-byte Folded Spill
1119 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v44
1120 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:516 ; 4-byte Folded Spill
1121 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v43
1122 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:508 ; 4-byte Folded Spill
1123 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v43
1124 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:512 ; 4-byte Folded Spill
1125 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v43
1126 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:528 ; 4-byte Folded Spill
1127 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v42
1128 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:520 ; 4-byte Folded Spill
1129 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v42
1130 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:524 ; 4-byte Folded Spill
1131 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v42
1132 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:540 ; 4-byte Folded Spill
1133 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v41
1134 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:532 ; 4-byte Folded Spill
1135 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v41
1136 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:536 ; 4-byte Folded Spill
1137 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v41
1138 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:544 ; 4-byte Folded Spill
1139 ; GFX906-NEXT: s_waitcnt vmcnt(12)
1140 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v48
1141 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:548 ; 4-byte Folded Spill
1142 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v48
1143 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:552 ; 4-byte Folded Spill
1144 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v48
1145 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:564 ; 4-byte Folded Spill
1146 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v47
1147 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:556 ; 4-byte Folded Spill
1148 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v47
1149 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:560 ; 4-byte Folded Spill
1150 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v47
1151 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:576 ; 4-byte Folded Spill
1152 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v46
1153 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:568 ; 4-byte Folded Spill
1154 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v46
1155 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:572 ; 4-byte Folded Spill
1156 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v46
1157 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:588 ; 4-byte Folded Spill
1158 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v45
1159 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:580 ; 4-byte Folded Spill
1160 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v45
1161 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:584 ; 4-byte Folded Spill
1162 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v45
1163 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:592 ; 4-byte Folded Spill
1164 ; GFX906-NEXT: global_load_dwordx4 v[49:52], v63, s[6:7] offset:48
1165 ; GFX906-NEXT: global_load_dwordx4 v[53:56], v63, s[6:7] offset:32
1166 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1167 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v52
1168 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:596 ; 4-byte Folded Spill
1169 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v52
1170 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:600 ; 4-byte Folded Spill
1171 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v52
1172 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:612 ; 4-byte Folded Spill
1173 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v51
1174 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:604 ; 4-byte Folded Spill
1175 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v51
1176 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:608 ; 4-byte Folded Spill
1177 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v51
1178 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:624 ; 4-byte Folded Spill
1179 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v50
1180 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:616 ; 4-byte Folded Spill
1181 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v50
1182 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:620 ; 4-byte Folded Spill
1183 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v50
1184 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:636 ; 4-byte Folded Spill
1185 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v49
1186 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:628 ; 4-byte Folded Spill
1187 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v49
1188 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:632 ; 4-byte Folded Spill
1189 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v49
1190 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:640 ; 4-byte Folded Spill
1191 ; GFX906-NEXT: s_waitcnt vmcnt(12)
1192 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v56
1193 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:644 ; 4-byte Folded Spill
1194 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v56
1195 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:648 ; 4-byte Folded Spill
1196 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v56
1197 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:660 ; 4-byte Folded Spill
1198 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v55
1199 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:652 ; 4-byte Folded Spill
1200 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v55
1201 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:656 ; 4-byte Folded Spill
1202 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v55
1203 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:672 ; 4-byte Folded Spill
1204 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v54
1205 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:664 ; 4-byte Folded Spill
1206 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v54
1207 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:668 ; 4-byte Folded Spill
1208 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v54
1209 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:684 ; 4-byte Folded Spill
1210 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 24, v53
1211 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:676 ; 4-byte Folded Spill
1212 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v53
1213 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:680 ; 4-byte Folded Spill
1214 ; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v53
1215 ; GFX906-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:688 ; 4-byte Folded Spill
1216 ; GFX906-NEXT: global_load_dwordx4 v[57:60], v63, s[6:7] offset:16
1217 ; GFX906-NEXT: s_nop 0
1218 ; GFX906-NEXT: global_load_dwordx4 v[0:3], v63, s[6:7]
1219 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1220 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v60
1221 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:692 ; 4-byte Folded Spill
1222 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v60
1223 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:696 ; 4-byte Folded Spill
1224 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v60
1225 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:708 ; 4-byte Folded Spill
1226 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v59
1227 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:700 ; 4-byte Folded Spill
1228 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v59
1229 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:704 ; 4-byte Folded Spill
1230 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v59
1231 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:720 ; 4-byte Folded Spill
1232 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v58
1233 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:712 ; 4-byte Folded Spill
1234 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v58
1235 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:716 ; 4-byte Folded Spill
1236 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v58
1237 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:732 ; 4-byte Folded Spill
1238 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v57
1239 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:724 ; 4-byte Folded Spill
1240 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v57
1241 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:728 ; 4-byte Folded Spill
1242 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v57
1243 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:736 ; 4-byte Folded Spill
1244 ; GFX906-NEXT: s_waitcnt vmcnt(12)
1245 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v3
1246 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:740 ; 4-byte Folded Spill
1247 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v3
1248 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:744 ; 4-byte Folded Spill
1249 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v3
1250 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:756 ; 4-byte Folded Spill
1251 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v2
1252 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:748 ; 4-byte Folded Spill
1253 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v2
1254 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:752 ; 4-byte Folded Spill
1255 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v2
1256 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:768 ; 4-byte Folded Spill
1257 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 24, v1
1258 ; GFX906-NEXT: v_lshrrev_b32_e32 v62, 24, v0
1259 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:760 ; 4-byte Folded Spill
1260 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 16, v1
1261 ; GFX906-NEXT: buffer_store_dword v62, off, s[8:11], 0 offset:772 ; 4-byte Folded Spill
1262 ; GFX906-NEXT: v_lshrrev_b32_e32 v62, 16, v0
1263 ; GFX906-NEXT: buffer_store_dword v61, off, s[8:11], 0 offset:764 ; 4-byte Folded Spill
1264 ; GFX906-NEXT: v_lshrrev_b32_e32 v61, 8, v1
1265 ; GFX906-NEXT: buffer_store_dword v62, off, s[8:11], 0 offset:776 ; 4-byte Folded Spill
1266 ; GFX906-NEXT: v_lshrrev_b32_e32 v62, 8, v0
1267 ; GFX906-NEXT: .LBB6_2: ; %bb.2
1268 ; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
1269 ; GFX906-NEXT: v_lshlrev_b16_e32 v61, 8, v61
1270 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v61 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1271 ; GFX906-NEXT: buffer_load_dword v61, off, s[8:11], 0 offset:768 ; 4-byte Folded Reload
1272 ; GFX906-NEXT: v_lshlrev_b16_e32 v62, 8, v62
1273 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v62 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1274 ; GFX906-NEXT: buffer_load_dword v62, off, s[8:11], 0 offset:776 ; 4-byte Folded Reload
1275 ; GFX906-NEXT: buffer_load_dword v63, off, s[8:11], 0 offset:764 ; 4-byte Folded Reload
1276 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1277 ; GFX906-NEXT: v_lshlrev_b16_e32 v61, 8, v61
1278 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v61 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1279 ; GFX906-NEXT: buffer_load_dword v61, off, s[8:11], 0 offset:756 ; 4-byte Folded Reload
1280 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1281 ; GFX906-NEXT: v_lshlrev_b16_e32 v61, 8, v61
1282 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v61 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1283 ; GFX906-NEXT: buffer_load_dword v61, off, s[8:11], 0 offset:772 ; 4-byte Folded Reload
1284 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1285 ; GFX906-NEXT: v_lshlrev_b16_e32 v61, 8, v61
1286 ; GFX906-NEXT: v_or_b32_sdwa v61, v62, v61 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1287 ; GFX906-NEXT: buffer_load_dword v62, off, s[8:11], 0 offset:760 ; 4-byte Folded Reload
1288 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v61 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1289 ; GFX906-NEXT: buffer_load_dword v61, off, s[8:11], 0 offset:748 ; 4-byte Folded Reload
1290 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1291 ; GFX906-NEXT: v_lshlrev_b16_e32 v62, 8, v62
1292 ; GFX906-NEXT: v_or_b32_sdwa v62, v63, v62 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1293 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v62 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1294 ; GFX906-NEXT: buffer_load_dword v62, off, s[8:11], 0 offset:752 ; 4-byte Folded Reload
1295 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1296 ; GFX906-NEXT: v_lshlrev_b16_e32 v61, 8, v61
1297 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1298 ; GFX906-NEXT: v_or_b32_sdwa v61, v62, v61 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1299 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v61 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1300 ; GFX906-NEXT: buffer_load_dword v61, off, s[8:11], 0 offset:740 ; 4-byte Folded Reload
1301 ; GFX906-NEXT: buffer_load_dword v62, off, s[8:11], 0 offset:744 ; 4-byte Folded Reload
1302 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1303 ; GFX906-NEXT: v_lshlrev_b16_e32 v61, 8, v61
1304 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1305 ; GFX906-NEXT: v_or_b32_sdwa v61, v62, v61 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1306 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v61 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1307 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
1308 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:736 ; 4-byte Folded Reload
1309 ; GFX906-NEXT: s_nop 0
1310 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:732 ; 4-byte Folded Reload
1311 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:720 ; 4-byte Folded Reload
1312 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:708 ; 4-byte Folded Reload
1313 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1314 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1315 ; GFX906-NEXT: v_or_b32_sdwa v0, v57, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1316 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1317 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1318 ; GFX906-NEXT: buffer_load_dword v57, off, s[8:11], 0 offset:724 ; 4-byte Folded Reload
1319 ; GFX906-NEXT: v_or_b32_sdwa v1, v58, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1320 ; GFX906-NEXT: buffer_load_dword v58, off, s[8:11], 0 offset:728 ; 4-byte Folded Reload
1321 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1322 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1323 ; GFX906-NEXT: v_or_b32_sdwa v2, v59, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1324 ; GFX906-NEXT: buffer_load_dword v59, off, s[8:11], 0 offset:716 ; 4-byte Folded Reload
1325 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1326 ; GFX906-NEXT: v_lshlrev_b16_e32 v57, 8, v57
1327 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1328 ; GFX906-NEXT: v_or_b32_sdwa v57, v58, v57 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1329 ; GFX906-NEXT: buffer_load_dword v58, off, s[8:11], 0 offset:712 ; 4-byte Folded Reload
1330 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v57 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1331 ; GFX906-NEXT: buffer_load_dword v57, off, s[8:11], 0 offset:700 ; 4-byte Folded Reload
1332 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1333 ; GFX906-NEXT: v_lshlrev_b16_e32 v58, 8, v58
1334 ; GFX906-NEXT: v_or_b32_sdwa v58, v59, v58 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1335 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v58 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1336 ; GFX906-NEXT: buffer_load_dword v58, off, s[8:11], 0 offset:704 ; 4-byte Folded Reload
1337 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1338 ; GFX906-NEXT: v_lshlrev_b16_e32 v57, 8, v57
1339 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1340 ; GFX906-NEXT: v_or_b32_sdwa v57, v58, v57 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1341 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v57 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1342 ; GFX906-NEXT: buffer_load_dword v57, off, s[8:11], 0 offset:692 ; 4-byte Folded Reload
1343 ; GFX906-NEXT: buffer_load_dword v58, off, s[8:11], 0 offset:696 ; 4-byte Folded Reload
1344 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1345 ; GFX906-NEXT: v_or_b32_sdwa v3, v60, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1346 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1347 ; GFX906-NEXT: v_lshlrev_b16_e32 v57, 8, v57
1348 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1349 ; GFX906-NEXT: v_or_b32_sdwa v57, v58, v57 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1350 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v57 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1351 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16
1352 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:688 ; 4-byte Folded Reload
1353 ; GFX906-NEXT: s_nop 0
1354 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:684 ; 4-byte Folded Reload
1355 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:672 ; 4-byte Folded Reload
1356 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:660 ; 4-byte Folded Reload
1357 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1358 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1359 ; GFX906-NEXT: v_or_b32_sdwa v0, v53, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1360 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1361 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1362 ; GFX906-NEXT: buffer_load_dword v53, off, s[8:11], 0 offset:676 ; 4-byte Folded Reload
1363 ; GFX906-NEXT: v_or_b32_sdwa v1, v54, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1364 ; GFX906-NEXT: buffer_load_dword v54, off, s[8:11], 0 offset:680 ; 4-byte Folded Reload
1365 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1366 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1367 ; GFX906-NEXT: v_or_b32_sdwa v2, v55, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1368 ; GFX906-NEXT: buffer_load_dword v55, off, s[8:11], 0 offset:668 ; 4-byte Folded Reload
1369 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1370 ; GFX906-NEXT: v_lshlrev_b16_e32 v53, 8, v53
1371 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1372 ; GFX906-NEXT: v_or_b32_sdwa v53, v54, v53 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1373 ; GFX906-NEXT: buffer_load_dword v54, off, s[8:11], 0 offset:664 ; 4-byte Folded Reload
1374 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v53 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1375 ; GFX906-NEXT: buffer_load_dword v53, off, s[8:11], 0 offset:652 ; 4-byte Folded Reload
1376 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1377 ; GFX906-NEXT: v_lshlrev_b16_e32 v54, 8, v54
1378 ; GFX906-NEXT: v_or_b32_sdwa v54, v55, v54 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1379 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v54 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1380 ; GFX906-NEXT: buffer_load_dword v54, off, s[8:11], 0 offset:656 ; 4-byte Folded Reload
1381 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1382 ; GFX906-NEXT: v_lshlrev_b16_e32 v53, 8, v53
1383 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1384 ; GFX906-NEXT: v_or_b32_sdwa v53, v54, v53 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1385 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v53 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1386 ; GFX906-NEXT: buffer_load_dword v53, off, s[8:11], 0 offset:644 ; 4-byte Folded Reload
1387 ; GFX906-NEXT: buffer_load_dword v54, off, s[8:11], 0 offset:648 ; 4-byte Folded Reload
1388 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1389 ; GFX906-NEXT: v_or_b32_sdwa v3, v56, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1390 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1391 ; GFX906-NEXT: v_lshlrev_b16_e32 v53, 8, v53
1392 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1393 ; GFX906-NEXT: v_or_b32_sdwa v53, v54, v53 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1394 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v53 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1395 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:32
1396 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:640 ; 4-byte Folded Reload
1397 ; GFX906-NEXT: s_nop 0
1398 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:636 ; 4-byte Folded Reload
1399 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:624 ; 4-byte Folded Reload
1400 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:612 ; 4-byte Folded Reload
1401 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1402 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1403 ; GFX906-NEXT: v_or_b32_sdwa v0, v49, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1404 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1405 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1406 ; GFX906-NEXT: buffer_load_dword v49, off, s[8:11], 0 offset:628 ; 4-byte Folded Reload
1407 ; GFX906-NEXT: v_or_b32_sdwa v1, v50, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1408 ; GFX906-NEXT: buffer_load_dword v50, off, s[8:11], 0 offset:632 ; 4-byte Folded Reload
1409 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1410 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1411 ; GFX906-NEXT: v_or_b32_sdwa v2, v51, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1412 ; GFX906-NEXT: buffer_load_dword v51, off, s[8:11], 0 offset:620 ; 4-byte Folded Reload
1413 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1414 ; GFX906-NEXT: v_lshlrev_b16_e32 v49, 8, v49
1415 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1416 ; GFX906-NEXT: v_or_b32_sdwa v49, v50, v49 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1417 ; GFX906-NEXT: buffer_load_dword v50, off, s[8:11], 0 offset:616 ; 4-byte Folded Reload
1418 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v49 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1419 ; GFX906-NEXT: buffer_load_dword v49, off, s[8:11], 0 offset:604 ; 4-byte Folded Reload
1420 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1421 ; GFX906-NEXT: v_lshlrev_b16_e32 v50, 8, v50
1422 ; GFX906-NEXT: v_or_b32_sdwa v50, v51, v50 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1423 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v50 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1424 ; GFX906-NEXT: buffer_load_dword v50, off, s[8:11], 0 offset:608 ; 4-byte Folded Reload
1425 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1426 ; GFX906-NEXT: v_lshlrev_b16_e32 v49, 8, v49
1427 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1428 ; GFX906-NEXT: v_or_b32_sdwa v49, v50, v49 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1429 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v49 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1430 ; GFX906-NEXT: buffer_load_dword v49, off, s[8:11], 0 offset:596 ; 4-byte Folded Reload
1431 ; GFX906-NEXT: buffer_load_dword v50, off, s[8:11], 0 offset:600 ; 4-byte Folded Reload
1432 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1433 ; GFX906-NEXT: v_or_b32_sdwa v3, v52, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1434 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1435 ; GFX906-NEXT: v_lshlrev_b16_e32 v49, 8, v49
1436 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1437 ; GFX906-NEXT: v_or_b32_sdwa v49, v50, v49 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1438 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v49 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1439 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:48
1440 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:592 ; 4-byte Folded Reload
1441 ; GFX906-NEXT: s_nop 0
1442 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:588 ; 4-byte Folded Reload
1443 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:576 ; 4-byte Folded Reload
1444 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:564 ; 4-byte Folded Reload
1445 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1446 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1447 ; GFX906-NEXT: v_or_b32_sdwa v0, v45, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1448 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1449 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1450 ; GFX906-NEXT: buffer_load_dword v45, off, s[8:11], 0 offset:580 ; 4-byte Folded Reload
1451 ; GFX906-NEXT: v_or_b32_sdwa v1, v46, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1452 ; GFX906-NEXT: buffer_load_dword v46, off, s[8:11], 0 offset:584 ; 4-byte Folded Reload
1453 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1454 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1455 ; GFX906-NEXT: v_or_b32_sdwa v2, v47, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1456 ; GFX906-NEXT: buffer_load_dword v47, off, s[8:11], 0 offset:572 ; 4-byte Folded Reload
1457 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1458 ; GFX906-NEXT: v_lshlrev_b16_e32 v45, 8, v45
1459 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1460 ; GFX906-NEXT: v_or_b32_sdwa v45, v46, v45 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1461 ; GFX906-NEXT: buffer_load_dword v46, off, s[8:11], 0 offset:568 ; 4-byte Folded Reload
1462 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v45 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1463 ; GFX906-NEXT: buffer_load_dword v45, off, s[8:11], 0 offset:556 ; 4-byte Folded Reload
1464 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1465 ; GFX906-NEXT: v_lshlrev_b16_e32 v46, 8, v46
1466 ; GFX906-NEXT: v_or_b32_sdwa v46, v47, v46 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1467 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v46 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1468 ; GFX906-NEXT: buffer_load_dword v46, off, s[8:11], 0 offset:560 ; 4-byte Folded Reload
1469 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1470 ; GFX906-NEXT: v_lshlrev_b16_e32 v45, 8, v45
1471 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1472 ; GFX906-NEXT: v_or_b32_sdwa v45, v46, v45 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1473 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v45 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1474 ; GFX906-NEXT: buffer_load_dword v45, off, s[8:11], 0 offset:548 ; 4-byte Folded Reload
1475 ; GFX906-NEXT: buffer_load_dword v46, off, s[8:11], 0 offset:552 ; 4-byte Folded Reload
1476 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1477 ; GFX906-NEXT: v_or_b32_sdwa v3, v48, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1478 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1479 ; GFX906-NEXT: v_lshlrev_b16_e32 v45, 8, v45
1480 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1481 ; GFX906-NEXT: v_or_b32_sdwa v45, v46, v45 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1482 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v45 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1483 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:64
1484 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:544 ; 4-byte Folded Reload
1485 ; GFX906-NEXT: s_nop 0
1486 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:540 ; 4-byte Folded Reload
1487 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:528 ; 4-byte Folded Reload
1488 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:516 ; 4-byte Folded Reload
1489 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1490 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1491 ; GFX906-NEXT: v_or_b32_sdwa v0, v41, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1492 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1493 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1494 ; GFX906-NEXT: buffer_load_dword v41, off, s[8:11], 0 offset:532 ; 4-byte Folded Reload
1495 ; GFX906-NEXT: v_or_b32_sdwa v1, v42, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1496 ; GFX906-NEXT: buffer_load_dword v42, off, s[8:11], 0 offset:536 ; 4-byte Folded Reload
1497 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1498 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1499 ; GFX906-NEXT: v_or_b32_sdwa v2, v43, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1500 ; GFX906-NEXT: buffer_load_dword v43, off, s[8:11], 0 offset:524 ; 4-byte Folded Reload
1501 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1502 ; GFX906-NEXT: v_lshlrev_b16_e32 v41, 8, v41
1503 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1504 ; GFX906-NEXT: v_or_b32_sdwa v41, v42, v41 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1505 ; GFX906-NEXT: buffer_load_dword v42, off, s[8:11], 0 offset:520 ; 4-byte Folded Reload
1506 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v41 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1507 ; GFX906-NEXT: buffer_load_dword v41, off, s[8:11], 0 offset:508 ; 4-byte Folded Reload
1508 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1509 ; GFX906-NEXT: v_lshlrev_b16_e32 v42, 8, v42
1510 ; GFX906-NEXT: v_or_b32_sdwa v42, v43, v42 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1511 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v42 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1512 ; GFX906-NEXT: buffer_load_dword v42, off, s[8:11], 0 offset:512 ; 4-byte Folded Reload
1513 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1514 ; GFX906-NEXT: v_lshlrev_b16_e32 v41, 8, v41
1515 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1516 ; GFX906-NEXT: v_or_b32_sdwa v41, v42, v41 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1517 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v41 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1518 ; GFX906-NEXT: buffer_load_dword v41, off, s[8:11], 0 offset:500 ; 4-byte Folded Reload
1519 ; GFX906-NEXT: buffer_load_dword v42, off, s[8:11], 0 offset:504 ; 4-byte Folded Reload
1520 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1521 ; GFX906-NEXT: v_or_b32_sdwa v3, v44, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1522 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1523 ; GFX906-NEXT: v_lshlrev_b16_e32 v41, 8, v41
1524 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1525 ; GFX906-NEXT: v_or_b32_sdwa v41, v42, v41 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1526 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v41 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1527 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:80
1528 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:496 ; 4-byte Folded Reload
1529 ; GFX906-NEXT: s_nop 0
1530 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:492 ; 4-byte Folded Reload
1531 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:480 ; 4-byte Folded Reload
1532 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:468 ; 4-byte Folded Reload
1533 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1534 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1535 ; GFX906-NEXT: v_or_b32_sdwa v0, v37, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1536 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1537 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1538 ; GFX906-NEXT: buffer_load_dword v37, off, s[8:11], 0 offset:484 ; 4-byte Folded Reload
1539 ; GFX906-NEXT: v_or_b32_sdwa v1, v38, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1540 ; GFX906-NEXT: buffer_load_dword v38, off, s[8:11], 0 offset:488 ; 4-byte Folded Reload
1541 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1542 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1543 ; GFX906-NEXT: v_or_b32_sdwa v2, v39, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1544 ; GFX906-NEXT: buffer_load_dword v39, off, s[8:11], 0 offset:476 ; 4-byte Folded Reload
1545 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1546 ; GFX906-NEXT: v_lshlrev_b16_e32 v37, 8, v37
1547 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1548 ; GFX906-NEXT: v_or_b32_sdwa v37, v38, v37 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1549 ; GFX906-NEXT: buffer_load_dword v38, off, s[8:11], 0 offset:472 ; 4-byte Folded Reload
1550 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v37 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1551 ; GFX906-NEXT: buffer_load_dword v37, off, s[8:11], 0 offset:460 ; 4-byte Folded Reload
1552 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1553 ; GFX906-NEXT: v_lshlrev_b16_e32 v38, 8, v38
1554 ; GFX906-NEXT: v_or_b32_sdwa v38, v39, v38 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1555 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v38 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1556 ; GFX906-NEXT: buffer_load_dword v38, off, s[8:11], 0 offset:464 ; 4-byte Folded Reload
1557 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1558 ; GFX906-NEXT: v_lshlrev_b16_e32 v37, 8, v37
1559 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1560 ; GFX906-NEXT: v_or_b32_sdwa v37, v38, v37 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1561 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v37 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1562 ; GFX906-NEXT: buffer_load_dword v37, off, s[8:11], 0 offset:452 ; 4-byte Folded Reload
1563 ; GFX906-NEXT: buffer_load_dword v38, off, s[8:11], 0 offset:456 ; 4-byte Folded Reload
1564 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1565 ; GFX906-NEXT: v_or_b32_sdwa v3, v40, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1566 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1567 ; GFX906-NEXT: v_lshlrev_b16_e32 v37, 8, v37
1568 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1569 ; GFX906-NEXT: v_or_b32_sdwa v37, v38, v37 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1570 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v37 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1571 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:96
1572 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:448 ; 4-byte Folded Reload
1573 ; GFX906-NEXT: s_nop 0
1574 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:444 ; 4-byte Folded Reload
1575 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:432 ; 4-byte Folded Reload
1576 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:420 ; 4-byte Folded Reload
1577 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1578 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1579 ; GFX906-NEXT: v_or_b32_sdwa v0, v33, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1580 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1581 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1582 ; GFX906-NEXT: buffer_load_dword v33, off, s[8:11], 0 offset:436 ; 4-byte Folded Reload
1583 ; GFX906-NEXT: v_or_b32_sdwa v1, v34, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1584 ; GFX906-NEXT: buffer_load_dword v34, off, s[8:11], 0 offset:440 ; 4-byte Folded Reload
1585 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1586 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1587 ; GFX906-NEXT: v_or_b32_sdwa v2, v35, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1588 ; GFX906-NEXT: buffer_load_dword v35, off, s[8:11], 0 offset:428 ; 4-byte Folded Reload
1589 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1590 ; GFX906-NEXT: v_lshlrev_b16_e32 v33, 8, v33
1591 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1592 ; GFX906-NEXT: v_or_b32_sdwa v33, v34, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1593 ; GFX906-NEXT: buffer_load_dword v34, off, s[8:11], 0 offset:424 ; 4-byte Folded Reload
1594 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v33 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1595 ; GFX906-NEXT: buffer_load_dword v33, off, s[8:11], 0 offset:412 ; 4-byte Folded Reload
1596 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1597 ; GFX906-NEXT: v_lshlrev_b16_e32 v34, 8, v34
1598 ; GFX906-NEXT: v_or_b32_sdwa v34, v35, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1599 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1600 ; GFX906-NEXT: buffer_load_dword v34, off, s[8:11], 0 offset:416 ; 4-byte Folded Reload
1601 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1602 ; GFX906-NEXT: v_lshlrev_b16_e32 v33, 8, v33
1603 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1604 ; GFX906-NEXT: v_or_b32_sdwa v33, v34, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1605 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v33 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1606 ; GFX906-NEXT: buffer_load_dword v33, off, s[8:11], 0 offset:404 ; 4-byte Folded Reload
1607 ; GFX906-NEXT: buffer_load_dword v34, off, s[8:11], 0 offset:408 ; 4-byte Folded Reload
1608 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1609 ; GFX906-NEXT: v_or_b32_sdwa v3, v36, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1610 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1611 ; GFX906-NEXT: v_lshlrev_b16_e32 v33, 8, v33
1612 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1613 ; GFX906-NEXT: v_or_b32_sdwa v33, v34, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1614 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v33 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1615 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:112
1616 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:400 ; 4-byte Folded Reload
1617 ; GFX906-NEXT: s_nop 0
1618 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:396 ; 4-byte Folded Reload
1619 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:384 ; 4-byte Folded Reload
1620 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:372 ; 4-byte Folded Reload
1621 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1622 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1623 ; GFX906-NEXT: v_or_b32_sdwa v0, v29, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1624 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1625 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1626 ; GFX906-NEXT: buffer_load_dword v29, off, s[8:11], 0 offset:388 ; 4-byte Folded Reload
1627 ; GFX906-NEXT: v_or_b32_sdwa v1, v30, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1628 ; GFX906-NEXT: buffer_load_dword v30, off, s[8:11], 0 offset:392 ; 4-byte Folded Reload
1629 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1630 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1631 ; GFX906-NEXT: v_or_b32_sdwa v2, v31, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1632 ; GFX906-NEXT: buffer_load_dword v31, off, s[8:11], 0 offset:380 ; 4-byte Folded Reload
1633 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1634 ; GFX906-NEXT: v_lshlrev_b16_e32 v29, 8, v29
1635 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1636 ; GFX906-NEXT: v_or_b32_sdwa v29, v30, v29 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1637 ; GFX906-NEXT: buffer_load_dword v30, off, s[8:11], 0 offset:376 ; 4-byte Folded Reload
1638 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1639 ; GFX906-NEXT: buffer_load_dword v29, off, s[8:11], 0 offset:364 ; 4-byte Folded Reload
1640 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1641 ; GFX906-NEXT: v_lshlrev_b16_e32 v30, 8, v30
1642 ; GFX906-NEXT: v_or_b32_sdwa v30, v31, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1643 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1644 ; GFX906-NEXT: buffer_load_dword v30, off, s[8:11], 0 offset:368 ; 4-byte Folded Reload
1645 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1646 ; GFX906-NEXT: v_lshlrev_b16_e32 v29, 8, v29
1647 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1648 ; GFX906-NEXT: v_or_b32_sdwa v29, v30, v29 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1649 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1650 ; GFX906-NEXT: buffer_load_dword v29, off, s[8:11], 0 offset:356 ; 4-byte Folded Reload
1651 ; GFX906-NEXT: buffer_load_dword v30, off, s[8:11], 0 offset:360 ; 4-byte Folded Reload
1652 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1653 ; GFX906-NEXT: v_or_b32_sdwa v3, v32, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1654 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1655 ; GFX906-NEXT: v_lshlrev_b16_e32 v29, 8, v29
1656 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1657 ; GFX906-NEXT: v_or_b32_sdwa v29, v30, v29 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1658 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1659 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:128
1660 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:352 ; 4-byte Folded Reload
1661 ; GFX906-NEXT: s_nop 0
1662 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:348 ; 4-byte Folded Reload
1663 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:336 ; 4-byte Folded Reload
1664 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:324 ; 4-byte Folded Reload
1665 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1666 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1667 ; GFX906-NEXT: v_or_b32_sdwa v0, v25, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1668 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1669 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1670 ; GFX906-NEXT: buffer_load_dword v25, off, s[8:11], 0 offset:340 ; 4-byte Folded Reload
1671 ; GFX906-NEXT: v_or_b32_sdwa v1, v26, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1672 ; GFX906-NEXT: buffer_load_dword v26, off, s[8:11], 0 offset:344 ; 4-byte Folded Reload
1673 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1674 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1675 ; GFX906-NEXT: v_or_b32_sdwa v2, v27, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1676 ; GFX906-NEXT: buffer_load_dword v27, off, s[8:11], 0 offset:332 ; 4-byte Folded Reload
1677 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1678 ; GFX906-NEXT: v_lshlrev_b16_e32 v25, 8, v25
1679 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1680 ; GFX906-NEXT: v_or_b32_sdwa v25, v26, v25 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1681 ; GFX906-NEXT: buffer_load_dword v26, off, s[8:11], 0 offset:328 ; 4-byte Folded Reload
1682 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1683 ; GFX906-NEXT: buffer_load_dword v25, off, s[8:11], 0 offset:316 ; 4-byte Folded Reload
1684 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1685 ; GFX906-NEXT: v_lshlrev_b16_e32 v26, 8, v26
1686 ; GFX906-NEXT: v_or_b32_sdwa v26, v27, v26 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1687 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1688 ; GFX906-NEXT: buffer_load_dword v26, off, s[8:11], 0 offset:320 ; 4-byte Folded Reload
1689 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1690 ; GFX906-NEXT: v_lshlrev_b16_e32 v25, 8, v25
1691 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1692 ; GFX906-NEXT: v_or_b32_sdwa v25, v26, v25 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1693 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1694 ; GFX906-NEXT: buffer_load_dword v25, off, s[8:11], 0 offset:308 ; 4-byte Folded Reload
1695 ; GFX906-NEXT: buffer_load_dword v26, off, s[8:11], 0 offset:312 ; 4-byte Folded Reload
1696 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1697 ; GFX906-NEXT: v_or_b32_sdwa v3, v28, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1698 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1699 ; GFX906-NEXT: v_lshlrev_b16_e32 v25, 8, v25
1700 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1701 ; GFX906-NEXT: v_or_b32_sdwa v25, v26, v25 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1702 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1703 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:144
1704 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:304 ; 4-byte Folded Reload
1705 ; GFX906-NEXT: s_nop 0
1706 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:300 ; 4-byte Folded Reload
1707 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:288 ; 4-byte Folded Reload
1708 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:276 ; 4-byte Folded Reload
1709 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1710 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1711 ; GFX906-NEXT: v_or_b32_sdwa v0, v21, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1712 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1713 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1714 ; GFX906-NEXT: buffer_load_dword v21, off, s[8:11], 0 offset:292 ; 4-byte Folded Reload
1715 ; GFX906-NEXT: v_or_b32_sdwa v1, v22, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1716 ; GFX906-NEXT: buffer_load_dword v22, off, s[8:11], 0 offset:296 ; 4-byte Folded Reload
1717 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1718 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1719 ; GFX906-NEXT: v_or_b32_sdwa v2, v23, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1720 ; GFX906-NEXT: buffer_load_dword v23, off, s[8:11], 0 offset:284 ; 4-byte Folded Reload
1721 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1722 ; GFX906-NEXT: v_lshlrev_b16_e32 v21, 8, v21
1723 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1724 ; GFX906-NEXT: v_or_b32_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1725 ; GFX906-NEXT: buffer_load_dword v22, off, s[8:11], 0 offset:280 ; 4-byte Folded Reload
1726 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1727 ; GFX906-NEXT: buffer_load_dword v21, off, s[8:11], 0 offset:268 ; 4-byte Folded Reload
1728 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1729 ; GFX906-NEXT: v_lshlrev_b16_e32 v22, 8, v22
1730 ; GFX906-NEXT: v_or_b32_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1731 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1732 ; GFX906-NEXT: buffer_load_dword v22, off, s[8:11], 0 offset:272 ; 4-byte Folded Reload
1733 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1734 ; GFX906-NEXT: v_lshlrev_b16_e32 v21, 8, v21
1735 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1736 ; GFX906-NEXT: v_or_b32_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1737 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1738 ; GFX906-NEXT: buffer_load_dword v21, off, s[8:11], 0 offset:260 ; 4-byte Folded Reload
1739 ; GFX906-NEXT: buffer_load_dword v22, off, s[8:11], 0 offset:264 ; 4-byte Folded Reload
1740 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1741 ; GFX906-NEXT: v_or_b32_sdwa v3, v24, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1742 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1743 ; GFX906-NEXT: v_lshlrev_b16_e32 v21, 8, v21
1744 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1745 ; GFX906-NEXT: v_or_b32_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1746 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1747 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:160
1748 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:256 ; 4-byte Folded Reload
1749 ; GFX906-NEXT: s_nop 0
1750 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:252 ; 4-byte Folded Reload
1751 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:240 ; 4-byte Folded Reload
1752 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:228 ; 4-byte Folded Reload
1753 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1754 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1755 ; GFX906-NEXT: v_or_b32_sdwa v0, v17, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1756 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1757 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1758 ; GFX906-NEXT: buffer_load_dword v17, off, s[8:11], 0 offset:244 ; 4-byte Folded Reload
1759 ; GFX906-NEXT: v_or_b32_sdwa v1, v18, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1760 ; GFX906-NEXT: buffer_load_dword v18, off, s[8:11], 0 offset:248 ; 4-byte Folded Reload
1761 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1762 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1763 ; GFX906-NEXT: v_or_b32_sdwa v2, v19, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1764 ; GFX906-NEXT: buffer_load_dword v19, off, s[8:11], 0 offset:236 ; 4-byte Folded Reload
1765 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1766 ; GFX906-NEXT: v_lshlrev_b16_e32 v17, 8, v17
1767 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1768 ; GFX906-NEXT: v_or_b32_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1769 ; GFX906-NEXT: buffer_load_dword v18, off, s[8:11], 0 offset:232 ; 4-byte Folded Reload
1770 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1771 ; GFX906-NEXT: buffer_load_dword v17, off, s[8:11], 0 offset:220 ; 4-byte Folded Reload
1772 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1773 ; GFX906-NEXT: v_lshlrev_b16_e32 v18, 8, v18
1774 ; GFX906-NEXT: v_or_b32_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1775 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1776 ; GFX906-NEXT: buffer_load_dword v18, off, s[8:11], 0 offset:224 ; 4-byte Folded Reload
1777 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1778 ; GFX906-NEXT: v_lshlrev_b16_e32 v17, 8, v17
1779 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1780 ; GFX906-NEXT: v_or_b32_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1781 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1782 ; GFX906-NEXT: buffer_load_dword v17, off, s[8:11], 0 offset:212 ; 4-byte Folded Reload
1783 ; GFX906-NEXT: buffer_load_dword v18, off, s[8:11], 0 offset:216 ; 4-byte Folded Reload
1784 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1785 ; GFX906-NEXT: v_or_b32_sdwa v3, v20, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1786 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1787 ; GFX906-NEXT: v_lshlrev_b16_e32 v17, 8, v17
1788 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1789 ; GFX906-NEXT: v_or_b32_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1790 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1791 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:176
1792 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:204 ; 4-byte Folded Reload
1793 ; GFX906-NEXT: s_nop 0
1794 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:208 ; 4-byte Folded Reload
1795 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:196 ; 4-byte Folded Reload
1796 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:188 ; 4-byte Folded Reload
1797 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1798 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1799 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1800 ; GFX906-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1801 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:192 ; 4-byte Folded Reload
1802 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1803 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1804 ; GFX906-NEXT: v_or_b32_sdwa v3, v14, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1805 ; GFX906-NEXT: buffer_load_dword v14, off, s[8:11], 0 offset:168 ; 4-byte Folded Reload
1806 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1807 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1808 ; GFX906-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1809 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:200 ; 4-byte Folded Reload
1810 ; GFX906-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1811 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:184 ; 4-byte Folded Reload
1812 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1813 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1814 ; GFX906-NEXT: v_or_b32_sdwa v2, v13, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1815 ; GFX906-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1816 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:180 ; 4-byte Folded Reload
1817 ; GFX906-NEXT: buffer_load_dword v13, off, s[8:11], 0 offset:164 ; 4-byte Folded Reload
1818 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1819 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1820 ; GFX906-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1821 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:176 ; 4-byte Folded Reload
1822 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1823 ; GFX906-NEXT: v_lshlrev_b16_e32 v13, 8, v13
1824 ; GFX906-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1825 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1826 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1827 ; GFX906-NEXT: v_or_b32_sdwa v3, v15, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1828 ; GFX906-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1829 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:172 ; 4-byte Folded Reload
1830 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1831 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1832 ; GFX906-NEXT: v_or_b32_sdwa v3, v16, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1833 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1834 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:192
1835 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:160 ; 4-byte Folded Reload
1836 ; GFX906-NEXT: s_nop 0
1837 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:156 ; 4-byte Folded Reload
1838 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:152 ; 4-byte Folded Reload
1839 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:144 ; 4-byte Folded Reload
1840 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1841 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1842 ; GFX906-NEXT: v_or_b32_sdwa v0, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1843 ; GFX906-NEXT: buffer_load_dword v9, off, s[8:11], 0 offset:132 ; 4-byte Folded Reload
1844 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1845 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1846 ; GFX906-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1847 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1848 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:148 ; 4-byte Folded Reload
1849 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:140 ; 4-byte Folded Reload
1850 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1851 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1852 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1853 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1854 ; GFX906-NEXT: v_or_b32_sdwa v1, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1855 ; GFX906-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1856 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1857 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:136 ; 4-byte Folded Reload
1858 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:128 ; 4-byte Folded Reload
1859 ; GFX906-NEXT: buffer_load_dword v10, off, s[8:11], 0 offset:120 ; 4-byte Folded Reload
1860 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1861 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1862 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1863 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1864 ; GFX906-NEXT: v_or_b32_sdwa v2, v11, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1865 ; GFX906-NEXT: v_or_b32_sdwa v3, v9, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1866 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1867 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:124 ; 4-byte Folded Reload
1868 ; GFX906-NEXT: buffer_load_dword v9, off, s[8:11], 0 offset:116 ; 4-byte Folded Reload
1869 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1870 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1871 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1872 ; GFX906-NEXT: v_lshlrev_b16_e32 v9, 8, v9
1873 ; GFX906-NEXT: v_or_b32_sdwa v3, v12, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1874 ; GFX906-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1875 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1876 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:208
1877 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:112 ; 4-byte Folded Reload
1878 ; GFX906-NEXT: s_nop 0
1879 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:108 ; 4-byte Folded Reload
1880 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:104 ; 4-byte Folded Reload
1881 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:96 ; 4-byte Folded Reload
1882 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1883 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1884 ; GFX906-NEXT: v_or_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1885 ; GFX906-NEXT: buffer_load_dword v5, off, s[8:11], 0 offset:84 ; 4-byte Folded Reload
1886 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1887 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1888 ; GFX906-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1889 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1890 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:100 ; 4-byte Folded Reload
1891 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:92 ; 4-byte Folded Reload
1892 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1893 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1894 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1895 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1896 ; GFX906-NEXT: v_or_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1897 ; GFX906-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1898 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1899 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:88 ; 4-byte Folded Reload
1900 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:80 ; 4-byte Folded Reload
1901 ; GFX906-NEXT: buffer_load_dword v6, off, s[8:11], 0 offset:72 ; 4-byte Folded Reload
1902 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1903 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1904 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1905 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1906 ; GFX906-NEXT: v_or_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1907 ; GFX906-NEXT: v_or_b32_sdwa v3, v5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1908 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1909 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:76 ; 4-byte Folded Reload
1910 ; GFX906-NEXT: buffer_load_dword v5, off, s[8:11], 0 offset:68 ; 4-byte Folded Reload
1911 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1912 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1913 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1914 ; GFX906-NEXT: v_lshlrev_b16_e32 v5, 8, v5
1915 ; GFX906-NEXT: v_or_b32_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1916 ; GFX906-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1917 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1918 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:224
1919 ; GFX906-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:64 ; 4-byte Folded Reload
1920 ; GFX906-NEXT: s_nop 0
1921 ; GFX906-NEXT: buffer_load_dword v5, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
1922 ; GFX906-NEXT: buffer_load_dword v6, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
1923 ; GFX906-NEXT: buffer_load_dword v7, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload
1924 ; GFX906-NEXT: buffer_load_dword v8, off, s[8:11], 0 offset:16 ; 4-byte Folded Reload
1925 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:56 ; 4-byte Folded Reload
1926 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:60 ; 4-byte Folded Reload
1927 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:48 ; 4-byte Folded Reload
1928 ; GFX906-NEXT: s_waitcnt vmcnt(7)
1929 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0
1930 ; GFX906-NEXT: s_waitcnt vmcnt(3)
1931 ; GFX906-NEXT: v_or_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1932 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1933 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1934 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1935 ; GFX906-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1936 ; GFX906-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1937 ; GFX906-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:52 ; 4-byte Folded Reload
1938 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:44 ; 4-byte Folded Reload
1939 ; GFX906-NEXT: buffer_load_dword v5, off, s[8:11], 0 offset:36 ; 4-byte Folded Reload
1940 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1941 ; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v1
1942 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1943 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1944 ; GFX906-NEXT: v_or_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1945 ; GFX906-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1946 ; GFX906-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1947 ; GFX906-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:40 ; 4-byte Folded Reload
1948 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:32 ; 4-byte Folded Reload
1949 ; GFX906-NEXT: buffer_load_dword v6, off, s[8:11], 0 offset:24 ; 4-byte Folded Reload
1950 ; GFX906-NEXT: s_waitcnt vmcnt(2)
1951 ; GFX906-NEXT: v_lshlrev_b16_e32 v2, 8, v2
1952 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1953 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1954 ; GFX906-NEXT: v_or_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1955 ; GFX906-NEXT: v_or_b32_sdwa v3, v5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1956 ; GFX906-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1957 ; GFX906-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:28 ; 4-byte Folded Reload
1958 ; GFX906-NEXT: buffer_load_dword v5, off, s[8:11], 0 offset:20 ; 4-byte Folded Reload
1959 ; GFX906-NEXT: s_waitcnt vmcnt(1)
1960 ; GFX906-NEXT: v_lshlrev_b16_e32 v3, 8, v3
1961 ; GFX906-NEXT: s_waitcnt vmcnt(0)
1962 ; GFX906-NEXT: v_lshlrev_b16_e32 v5, 8, v5
1963 ; GFX906-NEXT: v_or_b32_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1964 ; GFX906-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1965 ; GFX906-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1966 ; GFX906-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:240
1967 ; GFX906-NEXT: s_endpgm
1969 %idx = call i32 @llvm.amdgcn.workitem.id.x()
1970 %gep1 = getelementptr <8 x i8>, ptr addrspace(1) %src1, i32 %idx
1971 %vec1 = load <256 x i8>, ptr addrspace(1) %gep1
1972 %gep2 = getelementptr <8 x i8>, ptr addrspace(1) %src2, i32 %idx
1973 %vec2 = load <256 x i8>, ptr addrspace(1) %gep2
1974 %cmp = icmp ult i32 %idx, 15
1975 br i1 %cmp, label %bb.1, label %bb.2
1980 %tmp5 = phi <256 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ]
1981 store <256 x i8> %tmp5, ptr addrspace(1) %dst, align 4
1985 declare i32 @llvm.amdgcn.workitem.id.x()