1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-SI %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-HSA %s
4 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-VI %s
5 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=EG %s
6 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=CM %s
8 ; FIXME: r600 is broken because the bigger testcases spill and it's not implemented
10 define amdgpu_kernel void @global_load_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
11 ; GCN-NOHSA-SI-LABEL: global_load_i16:
12 ; GCN-NOHSA-SI: ; %bb.0: ; %entry
13 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
14 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
15 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
16 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
17 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
18 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
19 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
20 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
21 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
22 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
23 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
24 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
25 ; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[4:7], 0
26 ; GCN-NOHSA-SI-NEXT: s_endpgm
28 ; GCN-HSA-LABEL: global_load_i16:
29 ; GCN-HSA: ; %bb.0: ; %entry
30 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
31 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
32 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
33 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
34 ; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1]
35 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
36 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
37 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
38 ; GCN-HSA-NEXT: flat_store_short v[0:1], v2
39 ; GCN-HSA-NEXT: s_endpgm
41 ; GCN-NOHSA-VI-LABEL: global_load_i16:
42 ; GCN-NOHSA-VI: ; %bb.0: ; %entry
43 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
44 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
45 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
46 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
47 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
48 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
49 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
50 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
51 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
52 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
53 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
54 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
55 ; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[4:7], 0
56 ; GCN-NOHSA-VI-NEXT: s_endpgm
58 ; EG-LABEL: global_load_i16:
59 ; EG: ; %bb.0: ; %entry
60 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
62 ; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
63 ; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
66 ; EG-NEXT: Fetch clause starting at 6:
67 ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
68 ; EG-NEXT: ALU clause starting at 8:
69 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
70 ; EG-NEXT: ALU clause starting at 9:
71 ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
72 ; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
73 ; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
74 ; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
75 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
76 ; EG-NEXT: LSHL T0.X, T1.W, PV.W,
77 ; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
78 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
79 ; EG-NEXT: MOV T0.Y, 0.0,
80 ; EG-NEXT: MOV * T0.Z, 0.0,
81 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
82 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
84 ; CM-LABEL: global_load_i16:
85 ; CM: ; %bb.0: ; %entry
86 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
88 ; CM-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
89 ; CM-NEXT: MEM_RAT MSKOR T0.XW, T1.X
92 ; CM-NEXT: Fetch clause starting at 6:
93 ; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
94 ; CM-NEXT: ALU clause starting at 8:
95 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
96 ; CM-NEXT: ALU clause starting at 9:
97 ; CM-NEXT: AND_INT * T0.W, KC0[2].Y, literal.x,
98 ; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
99 ; CM-NEXT: AND_INT T0.Z, T0.X, literal.x,
100 ; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
101 ; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
102 ; CM-NEXT: LSHL T0.X, PV.Z, PV.W,
103 ; CM-NEXT: LSHL * T0.W, literal.x, PV.W,
104 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
105 ; CM-NEXT: MOV T0.Y, 0.0,
106 ; CM-NEXT: MOV * T0.Z, 0.0,
107 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
108 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
110 %ld = load i16, ptr addrspace(1) %in
111 store i16 %ld, ptr addrspace(1) %out
115 define amdgpu_kernel void @global_load_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
116 ; GCN-NOHSA-SI-LABEL: global_load_v2i16:
117 ; GCN-NOHSA-SI: ; %bb.0: ; %entry
118 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
119 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
120 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
121 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
122 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
123 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
124 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
125 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
126 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
127 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
128 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
129 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
130 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
131 ; GCN-NOHSA-SI-NEXT: s_endpgm
133 ; GCN-HSA-LABEL: global_load_v2i16:
134 ; GCN-HSA: ; %bb.0: ; %entry
135 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
136 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
137 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
138 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
139 ; GCN-HSA-NEXT: flat_load_dword v2, v[0:1]
140 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
141 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
142 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
143 ; GCN-HSA-NEXT: flat_store_dword v[0:1], v2
144 ; GCN-HSA-NEXT: s_endpgm
146 ; GCN-NOHSA-VI-LABEL: global_load_v2i16:
147 ; GCN-NOHSA-VI: ; %bb.0: ; %entry
148 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
149 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
150 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
151 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
152 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
153 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
154 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
155 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
156 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
157 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
158 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
159 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
160 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
161 ; GCN-NOHSA-VI-NEXT: s_endpgm
163 ; EG-LABEL: global_load_v2i16:
164 ; EG: ; %bb.0: ; %entry
165 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
167 ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
168 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
171 ; EG-NEXT: Fetch clause starting at 6:
172 ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
173 ; EG-NEXT: ALU clause starting at 8:
174 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
175 ; EG-NEXT: ALU clause starting at 9:
176 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
177 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
179 ; CM-LABEL: global_load_v2i16:
180 ; CM: ; %bb.0: ; %entry
181 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
183 ; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
184 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
187 ; CM-NEXT: Fetch clause starting at 6:
188 ; CM-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
189 ; CM-NEXT: ALU clause starting at 8:
190 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
191 ; CM-NEXT: ALU clause starting at 9:
192 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
193 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
195 %ld = load <2 x i16>, ptr addrspace(1) %in
196 store <2 x i16> %ld, ptr addrspace(1) %out
200 define amdgpu_kernel void @global_load_v3i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
201 ; GCN-NOHSA-SI-LABEL: global_load_v3i16:
202 ; GCN-NOHSA-SI: ; %bb.0: ; %entry
203 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
204 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
205 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
206 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
207 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
208 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
209 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
210 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
211 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
212 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
213 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
214 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
215 ; GCN-NOHSA-SI-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4
216 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
217 ; GCN-NOHSA-SI-NEXT: s_endpgm
219 ; GCN-HSA-LABEL: global_load_v3i16:
220 ; GCN-HSA: ; %bb.0: ; %entry
221 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
222 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
223 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
224 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
225 ; GCN-HSA-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
226 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 4
227 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
228 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
229 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1
230 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
231 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0
232 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
233 ; GCN-HSA-NEXT: flat_store_short v[4:5], v1
234 ; GCN-HSA-NEXT: flat_store_dword v[2:3], v0
235 ; GCN-HSA-NEXT: s_endpgm
237 ; GCN-NOHSA-VI-LABEL: global_load_v3i16:
238 ; GCN-NOHSA-VI: ; %bb.0: ; %entry
239 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
240 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
241 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
242 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
243 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
244 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
245 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
246 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
247 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
248 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
249 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
250 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
251 ; GCN-NOHSA-VI-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4
252 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
253 ; GCN-NOHSA-VI-NEXT: s_endpgm
255 ; EG-LABEL: global_load_v3i16:
256 ; EG: ; %bb.0: ; %entry
257 ; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
259 ; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
260 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
261 ; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
263 ; EG-NEXT: Fetch clause starting at 6:
264 ; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
265 ; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
266 ; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
267 ; EG-NEXT: ALU clause starting at 12:
268 ; EG-NEXT: MOV * T5.X, KC0[2].Z,
269 ; EG-NEXT: ALU clause starting at 13:
270 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
271 ; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
272 ; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
273 ; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
274 ; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
275 ; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
276 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
277 ; EG-NEXT: LSHL T5.X, T2.W, PV.W,
278 ; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
279 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
280 ; EG-NEXT: MOV T5.Y, 0.0,
281 ; EG-NEXT: MOV * T5.Z, 0.0,
282 ; EG-NEXT: LSHR T8.X, T0.W, literal.x,
283 ; EG-NEXT: LSHL T0.W, T7.X, literal.y,
284 ; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
285 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
286 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
287 ; EG-NEXT: OR_INT T6.X, PV.W, PS,
288 ; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
289 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
291 ; CM-LABEL: global_load_v3i16:
292 ; CM: ; %bb.0: ; %entry
293 ; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
295 ; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
296 ; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X
297 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
299 ; CM-NEXT: Fetch clause starting at 6:
300 ; CM-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
301 ; CM-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
302 ; CM-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
303 ; CM-NEXT: ALU clause starting at 12:
304 ; CM-NEXT: MOV * T5.X, KC0[2].Z,
305 ; CM-NEXT: ALU clause starting at 13:
306 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
307 ; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00)
308 ; CM-NEXT: AND_INT * T1.W, PV.W, literal.x,
309 ; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
310 ; CM-NEXT: AND_INT T0.Z, T5.X, literal.x,
311 ; CM-NEXT: LSHL * T1.W, PV.W, literal.y,
312 ; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
313 ; CM-NEXT: LSHL T5.X, PV.Z, PV.W,
314 ; CM-NEXT: LSHL * T5.W, literal.x, PV.W,
315 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
316 ; CM-NEXT: MOV T5.Y, 0.0,
317 ; CM-NEXT: MOV * T5.Z, 0.0,
318 ; CM-NEXT: LSHL T0.Z, T7.X, literal.x,
319 ; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
320 ; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
321 ; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W,
322 ; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
323 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
324 ; CM-NEXT: LSHR * T8.X, T0.W, literal.x,
325 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
327 %ld = load <3 x i16>, ptr addrspace(1) %in
328 store <3 x i16> %ld, ptr addrspace(1) %out
332 define amdgpu_kernel void @global_load_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
333 ; GCN-NOHSA-SI-LABEL: global_load_v4i16:
334 ; GCN-NOHSA-SI: ; %bb.0: ; %entry
335 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
336 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
337 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
338 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
339 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
340 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
341 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
342 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
343 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
344 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
345 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
346 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
347 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
348 ; GCN-NOHSA-SI-NEXT: s_endpgm
350 ; GCN-HSA-LABEL: global_load_v4i16:
351 ; GCN-HSA: ; %bb.0: ; %entry
352 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
353 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
354 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
355 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
356 ; GCN-HSA-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
357 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0
358 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1
359 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
360 ; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
361 ; GCN-HSA-NEXT: s_endpgm
363 ; GCN-NOHSA-VI-LABEL: global_load_v4i16:
364 ; GCN-NOHSA-VI: ; %bb.0: ; %entry
365 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
366 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
367 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
368 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
369 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
370 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
371 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
372 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
373 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
374 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
375 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
376 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
377 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
378 ; GCN-NOHSA-VI-NEXT: s_endpgm
380 ; EG-LABEL: global_load_v4i16:
381 ; EG: ; %bb.0: ; %entry
382 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
384 ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
385 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
388 ; EG-NEXT: Fetch clause starting at 6:
389 ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
390 ; EG-NEXT: ALU clause starting at 8:
391 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
392 ; EG-NEXT: ALU clause starting at 9:
393 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
394 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
396 ; CM-LABEL: global_load_v4i16:
397 ; CM: ; %bb.0: ; %entry
398 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
400 ; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
401 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
404 ; CM-NEXT: Fetch clause starting at 6:
405 ; CM-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
406 ; CM-NEXT: ALU clause starting at 8:
407 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
408 ; CM-NEXT: ALU clause starting at 9:
409 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
410 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
412 %ld = load <4 x i16>, ptr addrspace(1) %in
413 store <4 x i16> %ld, ptr addrspace(1) %out
417 define amdgpu_kernel void @global_load_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
418 ; GCN-NOHSA-SI-LABEL: global_load_v8i16:
419 ; GCN-NOHSA-SI: ; %bb.0: ; %entry
420 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
421 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
422 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
423 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
424 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
425 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
426 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
427 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
428 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
429 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
430 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
431 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
432 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
433 ; GCN-NOHSA-SI-NEXT: s_endpgm
435 ; GCN-HSA-LABEL: global_load_v8i16:
436 ; GCN-HSA: ; %bb.0: ; %entry
437 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
438 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
439 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
440 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
441 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
442 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
443 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
444 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
445 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
446 ; GCN-HSA-NEXT: s_endpgm
448 ; GCN-NOHSA-VI-LABEL: global_load_v8i16:
449 ; GCN-NOHSA-VI: ; %bb.0: ; %entry
450 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
451 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
452 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
453 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
454 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
455 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
456 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
457 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
458 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
459 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
460 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
461 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
462 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
463 ; GCN-NOHSA-VI-NEXT: s_endpgm
465 ; EG-LABEL: global_load_v8i16:
466 ; EG: ; %bb.0: ; %entry
467 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
469 ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
470 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
473 ; EG-NEXT: Fetch clause starting at 6:
474 ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
475 ; EG-NEXT: ALU clause starting at 8:
476 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
477 ; EG-NEXT: ALU clause starting at 9:
478 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
479 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
481 ; CM-LABEL: global_load_v8i16:
482 ; CM: ; %bb.0: ; %entry
483 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
485 ; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
486 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
489 ; CM-NEXT: Fetch clause starting at 6:
490 ; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
491 ; CM-NEXT: ALU clause starting at 8:
492 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
493 ; CM-NEXT: ALU clause starting at 9:
494 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
495 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
497 %ld = load <8 x i16>, ptr addrspace(1) %in
498 store <8 x i16> %ld, ptr addrspace(1) %out
502 define amdgpu_kernel void @global_load_v16i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
503 ; GCN-NOHSA-SI-LABEL: global_load_v16i16:
504 ; GCN-NOHSA-SI: ; %bb.0: ; %entry
505 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
506 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
507 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
508 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
509 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
510 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
511 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
512 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
513 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16
514 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0
515 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
516 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
517 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1)
518 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
519 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1)
520 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
521 ; GCN-NOHSA-SI-NEXT: s_endpgm
523 ; GCN-HSA-LABEL: global_load_v16i16:
524 ; GCN-HSA: ; %bb.0: ; %entry
525 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
526 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
527 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 16
528 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
529 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
530 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
531 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 16
532 ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0
533 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
534 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
535 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
536 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
537 ; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5
538 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1
539 ; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4
540 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0
541 ; GCN-HSA-NEXT: s_waitcnt vmcnt(1)
542 ; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3]
543 ; GCN-HSA-NEXT: s_waitcnt vmcnt(1)
544 ; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
545 ; GCN-HSA-NEXT: s_endpgm
547 ; GCN-NOHSA-VI-LABEL: global_load_v16i16:
548 ; GCN-NOHSA-VI: ; %bb.0: ; %entry
549 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
550 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
551 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
552 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
553 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
554 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
555 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
556 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
557 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16
558 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0
559 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
560 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
561 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
562 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
563 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
564 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
565 ; GCN-NOHSA-VI-NEXT: s_endpgm
567 ; EG-LABEL: global_load_v16i16:
568 ; EG: ; %bb.0: ; %entry
569 ; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
571 ; EG-NEXT: ALU 1, @13, KC0[CB0:0-32], KC1[]
572 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
574 ; EG-NEXT: ALU 3, @15, KC0[CB0:0-32], KC1[]
575 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
577 ; EG-NEXT: Fetch clause starting at 8:
578 ; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1
579 ; EG-NEXT: Fetch clause starting at 10:
580 ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1
581 ; EG-NEXT: ALU clause starting at 12:
582 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
583 ; EG-NEXT: ALU clause starting at 13:
584 ; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
585 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
586 ; EG-NEXT: ALU clause starting at 15:
587 ; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x,
588 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
589 ; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
590 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
592 ; CM-LABEL: global_load_v16i16:
593 ; CM: ; %bb.0: ; %entry
594 ; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
596 ; CM-NEXT: ALU 1, @13, KC0[CB0:0-32], KC1[]
597 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T2.X
599 ; CM-NEXT: ALU 3, @15, KC0[CB0:0-32], KC1[]
600 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
602 ; CM-NEXT: Fetch clause starting at 8:
603 ; CM-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1
604 ; CM-NEXT: Fetch clause starting at 10:
605 ; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1
606 ; CM-NEXT: ALU clause starting at 12:
607 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
608 ; CM-NEXT: ALU clause starting at 13:
609 ; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
610 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
611 ; CM-NEXT: ALU clause starting at 15:
612 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x,
613 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
614 ; CM-NEXT: LSHR * T1.X, PV.W, literal.x,
615 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
617 %ld = load <16 x i16>, ptr addrspace(1) %in
618 store <16 x i16> %ld, ptr addrspace(1) %out
622 define amdgpu_kernel void @global_load_v16i16_align2(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
623 ; GCN-NOHSA-SI-LABEL: global_load_v16i16_align2:
624 ; GCN-NOHSA-SI: ; %bb.0: ; %entry
625 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
626 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000
627 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1
628 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, s10
629 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, s11
630 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
631 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s4
632 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s5
633 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s6
634 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s7
635 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
636 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:2
637 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v4, off, s[8:11], 0 offset:4
638 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v2, off, s[8:11], 0 offset:6
639 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v5, off, s[8:11], 0 offset:8
640 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v3, off, s[8:11], 0 offset:10
641 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v6, off, s[8:11], 0 offset:12
642 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v7, off, s[8:11], 0 offset:14
643 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v8, off, s[8:11], 0 offset:16
644 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v9, off, s[8:11], 0 offset:18
645 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v10, off, s[8:11], 0 offset:20
646 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v11, off, s[8:11], 0 offset:22
647 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v12, off, s[8:11], 0 offset:24
648 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v13, off, s[8:11], 0 offset:26
649 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v14, off, s[8:11], 0 offset:28
650 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v15, off, s[8:11], 0 offset:30
651 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(8)
652 ; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
653 ; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v16, 16, v3
654 ; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v17, 16, v2
655 ; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v18, 16, v1
656 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
657 ; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v15, 16, v15
658 ; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v13, 16, v13
659 ; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v11, 16, v11
660 ; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v9, 16, v9
661 ; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v3, v7, v6
662 ; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v2, v16, v5
663 ; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v1, v17, v4
664 ; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v0, v18, v0
665 ; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v7, v15, v14
666 ; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v6, v13, v12
667 ; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v5, v11, v10
668 ; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v4, v9, v8
669 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
670 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
671 ; GCN-NOHSA-SI-NEXT: s_endpgm
673 ; GCN-HSA-LABEL: global_load_v16i16_align2:
674 ; GCN-HSA: ; %bb.0: ; %entry
675 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
676 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
677 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 16
678 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
679 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
680 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
681 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 16
682 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
683 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
684 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
685 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
686 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
687 ; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5
688 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
689 ; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4
690 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
691 ; GCN-HSA-NEXT: s_waitcnt vmcnt(1)
692 ; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3]
693 ; GCN-HSA-NEXT: s_waitcnt vmcnt(1)
694 ; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
695 ; GCN-HSA-NEXT: s_endpgm
697 ; GCN-NOHSA-VI-LABEL: global_load_v16i16_align2:
698 ; GCN-NOHSA-VI: ; %bb.0: ; %entry
699 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
700 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
701 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
702 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
703 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
704 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
705 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 offset:14
706 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v1, off, s[4:7], 0 offset:10
707 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:6
708 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v3, off, s[4:7], 0 offset:2
709 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v4, off, s[4:7], 0 offset:30
710 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v5, off, s[4:7], 0 offset:26
711 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v6, off, s[4:7], 0 offset:22
712 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v7, off, s[4:7], 0 offset:18
713 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v8, off, s[4:7], 0 offset:12
714 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v9, off, s[4:7], 0 offset:8
715 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v10, off, s[4:7], 0 offset:4
716 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v11, off, s[4:7], 0
717 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v12, off, s[4:7], 0 offset:28
718 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v13, off, s[4:7], 0 offset:24
719 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v14, off, s[4:7], 0 offset:20
720 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v15, off, s[4:7], 0 offset:16
721 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s2
722 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s3
723 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(14)
724 ; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
725 ; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
726 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(13)
727 ; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v16, 16, v2
728 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(12)
729 ; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v17, 16, v3
730 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(11)
731 ; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
732 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(10)
733 ; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
734 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(9)
735 ; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v18, 16, v6
736 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8)
737 ; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v19, 16, v7
738 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7)
739 ; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v3, v8, v0
740 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6)
741 ; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v2, v9, v1
742 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(5)
743 ; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v1, v10, v16
744 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4)
745 ; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v0, v11, v17
746 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3)
747 ; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v7, v12, v4
748 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2)
749 ; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v6, v13, v5
750 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
751 ; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v5, v14, v18
752 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
753 ; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v4, v15, v19
754 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
755 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
756 ; GCN-NOHSA-VI-NEXT: s_endpgm
758 ; EG-LABEL: global_load_v16i16_align2:
759 ; EG: ; %bb.0: ; %entry
760 ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
762 ; EG-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[]
763 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0
764 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
766 ; EG-NEXT: Fetch clause starting at 6:
767 ; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1
768 ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
769 ; EG-NEXT: ALU clause starting at 10:
770 ; EG-NEXT: MOV * T0.X, KC0[2].Y,
771 ; EG-NEXT: ALU clause starting at 11:
772 ; EG-NEXT: LSHR T2.X, KC0[2].Z, literal.x,
773 ; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.y,
774 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
775 ; EG-NEXT: LSHR * T3.X, PV.W, literal.x,
776 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
778 ; CM-LABEL: global_load_v16i16_align2:
779 ; CM: ; %bb.0: ; %entry
780 ; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
782 ; CM-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[]
783 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X
784 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T2.X
786 ; CM-NEXT: Fetch clause starting at 6:
787 ; CM-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1
788 ; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
789 ; CM-NEXT: ALU clause starting at 10:
790 ; CM-NEXT: MOV * T0.X, KC0[2].Y,
791 ; CM-NEXT: ALU clause starting at 11:
792 ; CM-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x,
793 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
794 ; CM-NEXT: LSHR * T2.X, PV.W, literal.x,
795 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
796 ; CM-NEXT: LSHR * T3.X, KC0[2].Z, literal.x,
797 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
799 %ld = load <16 x i16>, ptr addrspace(1) %in, align 2
800 store <16 x i16> %ld, ptr addrspace(1) %out, align 32
804 define amdgpu_kernel void @global_zextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
805 ; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i32:
806 ; GCN-NOHSA-SI: ; %bb.0:
807 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
808 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
809 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
810 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
811 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
812 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
813 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
814 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
815 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
816 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
817 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
818 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
819 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
820 ; GCN-NOHSA-SI-NEXT: s_endpgm
822 ; GCN-HSA-LABEL: global_zextload_i16_to_i32:
824 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
825 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
826 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
827 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
828 ; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1]
829 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
830 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
831 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
832 ; GCN-HSA-NEXT: flat_store_dword v[0:1], v2
833 ; GCN-HSA-NEXT: s_endpgm
835 ; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i32:
836 ; GCN-NOHSA-VI: ; %bb.0:
837 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
838 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
839 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
840 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
841 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
842 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
843 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
844 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
845 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
846 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
847 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
848 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
849 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
850 ; GCN-NOHSA-VI-NEXT: s_endpgm
852 ; EG-LABEL: global_zextload_i16_to_i32:
854 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
856 ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
857 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
860 ; EG-NEXT: Fetch clause starting at 6:
861 ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
862 ; EG-NEXT: ALU clause starting at 8:
863 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
864 ; EG-NEXT: ALU clause starting at 9:
865 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
866 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
868 ; CM-LABEL: global_zextload_i16_to_i32:
870 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
872 ; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
873 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
876 ; CM-NEXT: Fetch clause starting at 6:
877 ; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
878 ; CM-NEXT: ALU clause starting at 8:
879 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
880 ; CM-NEXT: ALU clause starting at 9:
881 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
882 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
883 %a = load i16, ptr addrspace(1) %in
884 %ext = zext i16 %a to i32
885 store i32 %ext, ptr addrspace(1) %out
889 define amdgpu_kernel void @global_sextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
890 ; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i32:
891 ; GCN-NOHSA-SI: ; %bb.0:
892 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
893 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
894 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
895 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
896 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
897 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
898 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
899 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
900 ; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0
901 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
902 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
903 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
904 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
905 ; GCN-NOHSA-SI-NEXT: s_endpgm
907 ; GCN-HSA-LABEL: global_sextload_i16_to_i32:
909 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
910 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
911 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
912 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
913 ; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1]
914 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
915 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
916 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
917 ; GCN-HSA-NEXT: flat_store_dword v[0:1], v2
918 ; GCN-HSA-NEXT: s_endpgm
920 ; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i32:
921 ; GCN-NOHSA-VI: ; %bb.0:
922 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
923 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
924 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
925 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
926 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
927 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
928 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
929 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
930 ; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[8:11], 0
931 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
932 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
933 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
934 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
935 ; GCN-NOHSA-VI-NEXT: s_endpgm
937 ; EG-LABEL: global_sextload_i16_to_i32:
939 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
941 ; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
942 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
945 ; EG-NEXT: Fetch clause starting at 6:
946 ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
947 ; EG-NEXT: ALU clause starting at 8:
948 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
949 ; EG-NEXT: ALU clause starting at 9:
950 ; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x,
951 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
952 ; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
954 ; CM-LABEL: global_sextload_i16_to_i32:
956 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
958 ; CM-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[]
959 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
962 ; CM-NEXT: Fetch clause starting at 6:
963 ; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
964 ; CM-NEXT: ALU clause starting at 8:
965 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
966 ; CM-NEXT: ALU clause starting at 9:
967 ; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x,
968 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
969 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
970 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
971 %a = load i16, ptr addrspace(1) %in
972 %ext = sext i16 %a to i32
973 store i32 %ext, ptr addrspace(1) %out
977 define amdgpu_kernel void @global_zextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
978 ; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i32:
979 ; GCN-NOHSA-SI: ; %bb.0:
980 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
981 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
982 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
983 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
984 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
985 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
986 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
987 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
988 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
989 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
990 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
991 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
992 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
993 ; GCN-NOHSA-SI-NEXT: s_endpgm
995 ; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i32:
997 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
998 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
999 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
1000 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
1001 ; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1]
1002 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
1003 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
1004 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
1005 ; GCN-HSA-NEXT: flat_store_dword v[0:1], v2
1006 ; GCN-HSA-NEXT: s_endpgm
1008 ; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i32:
1009 ; GCN-NOHSA-VI: ; %bb.0:
1010 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1011 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
1012 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
1013 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
1014 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
1015 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
1016 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
1017 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
1018 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
1019 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
1020 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
1021 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
1022 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1023 ; GCN-NOHSA-VI-NEXT: s_endpgm
1025 ; EG-LABEL: global_zextload_v1i16_to_v1i32:
1027 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1029 ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
1030 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
1033 ; EG-NEXT: Fetch clause starting at 6:
1034 ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
1035 ; EG-NEXT: ALU clause starting at 8:
1036 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
1037 ; EG-NEXT: ALU clause starting at 9:
1038 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
1039 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1041 ; CM-LABEL: global_zextload_v1i16_to_v1i32:
1043 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1045 ; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
1046 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
1049 ; CM-NEXT: Fetch clause starting at 6:
1050 ; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
1051 ; CM-NEXT: ALU clause starting at 8:
1052 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
1053 ; CM-NEXT: ALU clause starting at 9:
1054 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
1055 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1056 %load = load <1 x i16>, ptr addrspace(1) %in
1057 %ext = zext <1 x i16> %load to <1 x i32>
1058 store <1 x i32> %ext, ptr addrspace(1) %out
1062 define amdgpu_kernel void @global_sextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
1063 ; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i32:
1064 ; GCN-NOHSA-SI: ; %bb.0:
1065 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
1066 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
1067 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
1068 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
1069 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
1070 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
1071 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
1072 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
1073 ; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0
1074 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
1075 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
1076 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
1077 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1078 ; GCN-NOHSA-SI-NEXT: s_endpgm
1080 ; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i32:
1082 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
1083 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
1084 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
1085 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
1086 ; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1]
1087 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
1088 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
1089 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
1090 ; GCN-HSA-NEXT: flat_store_dword v[0:1], v2
1091 ; GCN-HSA-NEXT: s_endpgm
1093 ; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i32:
1094 ; GCN-NOHSA-VI: ; %bb.0:
1095 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1096 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
1097 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
1098 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
1099 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
1100 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
1101 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
1102 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
1103 ; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[8:11], 0
1104 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
1105 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
1106 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
1107 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1108 ; GCN-NOHSA-VI-NEXT: s_endpgm
1110 ; EG-LABEL: global_sextload_v1i16_to_v1i32:
1112 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1114 ; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
1115 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
1118 ; EG-NEXT: Fetch clause starting at 6:
1119 ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
1120 ; EG-NEXT: ALU clause starting at 8:
1121 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
1122 ; EG-NEXT: ALU clause starting at 9:
1123 ; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x,
1124 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
1125 ; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
1127 ; CM-LABEL: global_sextload_v1i16_to_v1i32:
1129 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1131 ; CM-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[]
1132 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
1135 ; CM-NEXT: Fetch clause starting at 6:
1136 ; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
1137 ; CM-NEXT: ALU clause starting at 8:
1138 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
1139 ; CM-NEXT: ALU clause starting at 9:
1140 ; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x,
1141 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1142 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
1143 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1144 %load = load <1 x i16>, ptr addrspace(1) %in
1145 %ext = sext <1 x i16> %load to <1 x i32>
1146 store <1 x i32> %ext, ptr addrspace(1) %out
1150 define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
1151 ; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i32:
1152 ; GCN-NOHSA-SI: ; %bb.0:
1153 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
1154 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
1155 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
1156 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
1157 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
1158 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
1159 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
1160 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
1161 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
1162 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
1163 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
1164 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
1165 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1166 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
1167 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1168 ; GCN-NOHSA-SI-NEXT: s_endpgm
1170 ; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i32:
1172 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
1173 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
1174 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
1175 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
1176 ; GCN-HSA-NEXT: flat_load_dword v2, v[0:1]
1177 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
1178 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
1179 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
1180 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v2
1181 ; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v2
1182 ; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1183 ; GCN-HSA-NEXT: s_endpgm
1185 ; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i32:
1186 ; GCN-NOHSA-VI: ; %bb.0:
1187 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1188 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
1189 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
1190 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
1191 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
1192 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
1193 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
1194 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
1195 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
1196 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
1197 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
1198 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
1199 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1200 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
1201 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1202 ; GCN-NOHSA-VI-NEXT: s_endpgm
1204 ; EG-LABEL: global_zextload_v2i16_to_v2i32:
1206 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1208 ; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
1209 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
1212 ; EG-NEXT: Fetch clause starting at 6:
1213 ; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
1214 ; EG-NEXT: ALU clause starting at 8:
1215 ; EG-NEXT: MOV * T4.X, KC0[2].Z,
1216 ; EG-NEXT: ALU clause starting at 9:
1217 ; EG-NEXT: LSHR * T4.Y, T4.X, literal.x,
1218 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1219 ; EG-NEXT: AND_INT T4.X, T4.X, literal.x,
1220 ; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
1221 ; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
1223 ; CM-LABEL: global_zextload_v2i16_to_v2i32:
1225 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1227 ; CM-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
1228 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
1231 ; CM-NEXT: Fetch clause starting at 6:
1232 ; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
1233 ; CM-NEXT: ALU clause starting at 8:
1234 ; CM-NEXT: MOV * T4.X, KC0[2].Z,
1235 ; CM-NEXT: ALU clause starting at 9:
1236 ; CM-NEXT: LSHR * T4.Y, T4.X, literal.x,
1237 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1238 ; CM-NEXT: AND_INT * T4.X, T4.X, literal.x,
1239 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
1240 ; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x,
1241 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1242 %load = load <2 x i16>, ptr addrspace(1) %in
1243 %ext = zext <2 x i16> %load to <2 x i32>
1244 store <2 x i32> %ext, ptr addrspace(1) %out
1248 ; TODO: This should use ASHR instead of LSHR + BFE
1249 define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
1250 ; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i32:
1251 ; GCN-NOHSA-SI: ; %bb.0:
1252 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
1253 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
1254 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
1255 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
1256 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
1257 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
1258 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
1259 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
1260 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
1261 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
1262 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
1263 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
1264 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v0
1265 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v0, 0, 16
1266 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1267 ; GCN-NOHSA-SI-NEXT: s_endpgm
1269 ; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i32:
1271 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
1272 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
1273 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
1274 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
1275 ; GCN-HSA-NEXT: flat_load_dword v2, v[0:1]
1276 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
1277 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
1278 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
1279 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v2
1280 ; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16
1281 ; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1282 ; GCN-HSA-NEXT: s_endpgm
1284 ; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i32:
1285 ; GCN-NOHSA-VI: ; %bb.0:
1286 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1287 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
1288 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
1289 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
1290 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
1291 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
1292 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
1293 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
1294 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
1295 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
1296 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
1297 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
1298 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v0
1299 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16
1300 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1301 ; GCN-NOHSA-VI-NEXT: s_endpgm
1303 ; EG-LABEL: global_sextload_v2i16_to_v2i32:
1305 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1307 ; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
1308 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
1311 ; EG-NEXT: Fetch clause starting at 6:
1312 ; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
1313 ; EG-NEXT: ALU clause starting at 8:
1314 ; EG-NEXT: MOV * T4.X, KC0[2].Z,
1315 ; EG-NEXT: ALU clause starting at 9:
1316 ; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x,
1317 ; EG-NEXT: LSHR T0.W, T4.X, literal.x,
1318 ; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y,
1319 ; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
1320 ; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x,
1321 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1323 ; CM-LABEL: global_sextload_v2i16_to_v2i32:
1325 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1327 ; CM-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
1328 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T4.X
1331 ; CM-NEXT: Fetch clause starting at 6:
1332 ; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
1333 ; CM-NEXT: ALU clause starting at 8:
1334 ; CM-NEXT: MOV * T4.X, KC0[2].Z,
1335 ; CM-NEXT: ALU clause starting at 9:
1336 ; CM-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x,
1337 ; CM-NEXT: LSHR * T0.W, T4.X, literal.x,
1338 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1339 ; CM-NEXT: LSHR T4.X, KC0[2].Y, literal.x,
1340 ; CM-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.y,
1341 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
1342 %load = load <2 x i16>, ptr addrspace(1) %in
1343 %ext = sext <2 x i16> %load to <2 x i32>
1344 store <2 x i32> %ext, ptr addrspace(1) %out
1348 define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1349 ; GCN-NOHSA-SI-LABEL: global_zextload_v3i16_to_v3i32:
1350 ; GCN-NOHSA-SI: ; %bb.0: ; %entry
1351 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
1352 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
1353 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
1354 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
1355 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
1356 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
1357 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
1358 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
1359 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1360 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
1361 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
1362 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
1363 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v0
1364 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v0
1365 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v1
1366 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8
1367 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[4:7], 0
1368 ; GCN-NOHSA-SI-NEXT: s_endpgm
1370 ; GCN-HSA-LABEL: global_zextload_v3i16_to_v3i32:
1371 ; GCN-HSA: ; %bb.0: ; %entry
1372 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
1373 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
1374 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
1375 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
1376 ; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1]
1377 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0
1378 ; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1
1379 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
1380 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v3
1381 ; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v4
1382 ; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v3
1383 ; GCN-HSA-NEXT: flat_store_dwordx3 v[5:6], v[0:2]
1384 ; GCN-HSA-NEXT: s_endpgm
1386 ; GCN-NOHSA-VI-LABEL: global_zextload_v3i16_to_v3i32:
1387 ; GCN-NOHSA-VI: ; %bb.0: ; %entry
1388 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1389 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
1390 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
1391 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
1392 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
1393 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
1394 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
1395 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
1396 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1397 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
1398 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
1399 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
1400 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v1
1401 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1402 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
1403 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0
1404 ; GCN-NOHSA-VI-NEXT: s_endpgm
1406 ; EG-LABEL: global_zextload_v3i16_to_v3i32:
1407 ; EG: ; %bb.0: ; %entry
1408 ; EG-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[]
1410 ; EG-NEXT: ALU 2, @17, KC0[], KC1[]
1411 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0
1412 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1
1414 ; EG-NEXT: Fetch clause starting at 6:
1415 ; EG-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1
1416 ; EG-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1
1417 ; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1
1418 ; EG-NEXT: ALU clause starting at 12:
1419 ; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
1420 ; EG-NEXT: MOV * T1.X, KC0[2].Z,
1421 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1422 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
1423 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
1424 ; EG-NEXT: ALU clause starting at 17:
1425 ; EG-NEXT: LSHR T4.X, T0.W, literal.x,
1426 ; EG-NEXT: MOV * T3.Y, T1.X,
1427 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1429 ; CM-LABEL: global_zextload_v3i16_to_v3i32:
1430 ; CM: ; %bb.0: ; %entry
1431 ; CM-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[]
1433 ; CM-NEXT: ALU 2, @17, KC0[CB0:0-32], KC1[]
1434 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T3, T4.X
1435 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X
1437 ; CM-NEXT: Fetch clause starting at 6:
1438 ; CM-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1
1439 ; CM-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1
1440 ; CM-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1
1441 ; CM-NEXT: ALU clause starting at 12:
1442 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
1443 ; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
1444 ; CM-NEXT: LSHR * T0.X, PV.W, literal.x,
1445 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1446 ; CM-NEXT: MOV * T1.X, KC0[2].Z,
1447 ; CM-NEXT: ALU clause starting at 17:
1448 ; CM-NEXT: LSHR T4.X, KC0[2].Y, literal.x,
1449 ; CM-NEXT: MOV * T3.Y, T1.X,
1450 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1452 %ld = load <3 x i16>, ptr addrspace(1) %in
1453 %ext = zext <3 x i16> %ld to <3 x i32>
1454 store <3 x i32> %ext, ptr addrspace(1) %out
1458 define amdgpu_kernel void @global_sextload_v3i16_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1459 ; GCN-NOHSA-SI-LABEL: global_sextload_v3i16_to_v3i32:
1460 ; GCN-NOHSA-SI: ; %bb.0: ; %entry
1461 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
1462 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
1463 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
1464 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
1465 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
1466 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
1467 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
1468 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
1469 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1470 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
1471 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
1472 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
1473 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v0
1474 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v0, 0, 16
1475 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v1, 0, 16
1476 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8
1477 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[4:7], 0
1478 ; GCN-NOHSA-SI-NEXT: s_endpgm
1480 ; GCN-HSA-LABEL: global_sextload_v3i16_to_v3i32:
1481 ; GCN-HSA: ; %bb.0: ; %entry
1482 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
1483 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
1484 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
1485 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
1486 ; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1]
1487 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0
1488 ; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1
1489 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
1490 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v3
1491 ; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16
1492 ; GCN-HSA-NEXT: v_bfe_i32 v0, v3, 0, 16
1493 ; GCN-HSA-NEXT: flat_store_dwordx3 v[5:6], v[0:2]
1494 ; GCN-HSA-NEXT: s_endpgm
1496 ; GCN-NOHSA-VI-LABEL: global_sextload_v3i16_to_v3i32:
1497 ; GCN-NOHSA-VI: ; %bb.0: ; %entry
1498 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1499 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
1500 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
1501 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
1502 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
1503 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
1504 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
1505 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
1506 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[3:4], off, s[8:11], 0
1507 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
1508 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
1509 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
1510 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v3
1511 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v4, 0, 16
1512 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v3, 0, 16
1513 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0
1514 ; GCN-NOHSA-VI-NEXT: s_endpgm
1516 ; EG-LABEL: global_sextload_v3i16_to_v3i32:
1517 ; EG: ; %bb.0: ; %entry
1518 ; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
1520 ; EG-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[]
1521 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
1522 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1524 ; EG-NEXT: Fetch clause starting at 6:
1525 ; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1
1526 ; EG-NEXT: VTX_READ_16 T2.X, T0.X, 4, #1
1527 ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
1528 ; EG-NEXT: ALU clause starting at 12:
1529 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
1530 ; EG-NEXT: ALU clause starting at 13:
1531 ; EG-NEXT: BFE_INT * T0.Y, T1.X, 0.0, literal.x,
1532 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1533 ; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x,
1534 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
1535 ; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
1536 ; EG-NEXT: BFE_INT T2.X, T2.X, 0.0, literal.x,
1537 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
1538 ; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44)
1539 ; EG-NEXT: LSHR * T3.X, PV.W, literal.x,
1540 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1542 ; CM-LABEL: global_sextload_v3i16_to_v3i32:
1543 ; CM: ; %bb.0: ; %entry
1544 ; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
1546 ; CM-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[]
1547 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T2.X
1548 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T3.X
1550 ; CM-NEXT: Fetch clause starting at 6:
1551 ; CM-NEXT: VTX_READ_16 T1.X, T0.X, 4, #1
1552 ; CM-NEXT: VTX_READ_16 T2.X, T0.X, 0, #1
1553 ; CM-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1
1554 ; CM-NEXT: ALU clause starting at 12:
1555 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
1556 ; CM-NEXT: ALU clause starting at 13:
1557 ; CM-NEXT: BFE_INT T1.X, T1.X, 0.0, literal.x,
1558 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
1559 ; CM-NEXT: 16(2.242078e-44), 8(1.121039e-44)
1560 ; CM-NEXT: LSHR T3.X, PV.W, literal.x,
1561 ; CM-NEXT: BFE_INT * T0.Y, T0.X, 0.0, literal.y,
1562 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
1563 ; CM-NEXT: BFE_INT * T0.X, T2.X, 0.0, literal.x,
1564 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1565 ; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
1566 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1568 %ld = load <3 x i16>, ptr addrspace(1) %in
1569 %ext = sext <3 x i16> %ld to <3 x i32>
1570 store <3 x i32> %ext, ptr addrspace(1) %out
1574 ; TODO: This should use DST, but for some there are redundant MOVs
1575 define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
1576 ; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i32:
1577 ; GCN-NOHSA-SI: ; %bb.0:
1578 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
1579 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
1580 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
1581 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
1582 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
1583 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
1584 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
1585 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
1586 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[4:5], off, s[8:11], 0
1587 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
1588 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
1589 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
1590 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v5
1591 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v4
1592 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v5
1593 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v4
1594 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1595 ; GCN-NOHSA-SI-NEXT: s_endpgm
1597 ; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i32:
1599 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
1600 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
1601 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
1602 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
1603 ; GCN-HSA-NEXT: flat_load_dwordx2 v[4:5], v[0:1]
1604 ; GCN-HSA-NEXT: v_mov_b32_e32 v6, s0
1605 ; GCN-HSA-NEXT: v_mov_b32_e32 v7, s1
1606 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
1607 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v5
1608 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v4
1609 ; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v5
1610 ; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v4
1611 ; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3]
1612 ; GCN-HSA-NEXT: s_endpgm
1614 ; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i32:
1615 ; GCN-NOHSA-VI: ; %bb.0:
1616 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1617 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
1618 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
1619 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
1620 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
1621 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
1622 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
1623 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
1624 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1625 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
1626 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
1627 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
1628 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1
1629 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v1
1630 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1631 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
1632 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1633 ; GCN-NOHSA-VI-NEXT: s_endpgm
1635 ; EG-LABEL: global_zextload_v4i16_to_v4i32:
1637 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1639 ; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
1640 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1643 ; EG-NEXT: Fetch clause starting at 6:
1644 ; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
1645 ; EG-NEXT: ALU clause starting at 8:
1646 ; EG-NEXT: MOV * T5.X, KC0[2].Z,
1647 ; EG-NEXT: ALU clause starting at 9:
1648 ; EG-NEXT: LSHR * T5.W, T5.Y, literal.x,
1649 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1650 ; EG-NEXT: AND_INT * T5.Z, T5.Y, literal.x,
1651 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
1652 ; EG-NEXT: LSHR * T5.Y, T5.X, literal.x,
1653 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1654 ; EG-NEXT: AND_INT T5.X, T5.X, literal.x,
1655 ; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.y,
1656 ; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
1658 ; CM-LABEL: global_zextload_v4i16_to_v4i32:
1660 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1662 ; CM-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[]
1663 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T6.X
1666 ; CM-NEXT: Fetch clause starting at 6:
1667 ; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
1668 ; CM-NEXT: ALU clause starting at 8:
1669 ; CM-NEXT: MOV * T5.X, KC0[2].Z,
1670 ; CM-NEXT: ALU clause starting at 9:
1671 ; CM-NEXT: LSHR * T5.W, T5.Y, literal.x,
1672 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1673 ; CM-NEXT: AND_INT * T5.Z, T5.Y, literal.x,
1674 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
1675 ; CM-NEXT: LSHR * T5.Y, T5.X, literal.x,
1676 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1677 ; CM-NEXT: AND_INT * T5.X, T5.X, literal.x,
1678 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
1679 ; CM-NEXT: LSHR * T6.X, KC0[2].Y, literal.x,
1680 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1681 %load = load <4 x i16>, ptr addrspace(1) %in
1682 %ext = zext <4 x i16> %load to <4 x i32>
1683 store <4 x i32> %ext, ptr addrspace(1) %out
1687 ; TODO: We should use ASHR instead of LSHR + BFE
1688 ; TODO: This should use DST, but for some there are redundant MOVs
1689 define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
1690 ; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i32:
1691 ; GCN-NOHSA-SI: ; %bb.0:
1692 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
1693 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
1694 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
1695 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
1696 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
1697 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
1698 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
1699 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
1700 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[3:4], off, s[8:11], 0
1701 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
1702 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
1703 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
1704 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v3
1705 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[5:6], v[3:4], 48
1706 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v4, 0, 16
1707 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v3, 0, 16
1708 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v5
1709 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1710 ; GCN-NOHSA-SI-NEXT: s_endpgm
1712 ; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i32:
1714 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
1715 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
1716 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
1717 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
1718 ; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1]
1719 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0
1720 ; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1
1721 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
1722 ; GCN-HSA-NEXT: v_ashr_i64 v[7:8], v[3:4], 48
1723 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v3
1724 ; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16
1725 ; GCN-HSA-NEXT: v_bfe_i32 v0, v3, 0, 16
1726 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, v7
1727 ; GCN-HSA-NEXT: flat_store_dwordx4 v[5:6], v[0:3]
1728 ; GCN-HSA-NEXT: s_endpgm
1730 ; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i32:
1731 ; GCN-NOHSA-VI: ; %bb.0:
1732 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1733 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
1734 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
1735 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
1736 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
1737 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
1738 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
1739 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
1740 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[4:5], off, s[8:11], 0
1741 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
1742 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
1743 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
1744 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v5
1745 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v4
1746 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v5, 0, 16
1747 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v4, 0, 16
1748 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1749 ; GCN-NOHSA-VI-NEXT: s_endpgm
1751 ; EG-LABEL: global_sextload_v4i16_to_v4i32:
1753 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1755 ; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
1756 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
1759 ; EG-NEXT: Fetch clause starting at 6:
1760 ; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
1761 ; EG-NEXT: ALU clause starting at 8:
1762 ; EG-NEXT: MOV * T5.X, KC0[2].Z,
1763 ; EG-NEXT: ALU clause starting at 9:
1764 ; EG-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
1765 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1766 ; EG-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x,
1767 ; EG-NEXT: LSHR * T0.W, T5.Y, literal.x,
1768 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1769 ; EG-NEXT: BFE_INT T6.W, PV.W, 0.0, literal.x,
1770 ; EG-NEXT: LSHR * T0.W, T5.X, literal.x,
1771 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1772 ; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
1773 ; EG-NEXT: BFE_INT * T6.Y, PS, 0.0, literal.y,
1774 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
1776 ; CM-LABEL: global_sextload_v4i16_to_v4i32:
1778 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1780 ; CM-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
1781 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T5.X
1784 ; CM-NEXT: Fetch clause starting at 6:
1785 ; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
1786 ; CM-NEXT: ALU clause starting at 8:
1787 ; CM-NEXT: MOV * T5.X, KC0[2].Z,
1788 ; CM-NEXT: ALU clause starting at 9:
1789 ; CM-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
1790 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1791 ; CM-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x,
1792 ; CM-NEXT: LSHR * T0.W, T5.Y, literal.x,
1793 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1794 ; CM-NEXT: LSHR T0.Z, T5.X, literal.x,
1795 ; CM-NEXT: BFE_INT * T6.W, PV.W, 0.0, literal.x,
1796 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1797 ; CM-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
1798 ; CM-NEXT: BFE_INT * T6.Y, PV.Z, 0.0, literal.y,
1799 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
1800 %load = load <4 x i16>, ptr addrspace(1) %in
1801 %ext = sext <4 x i16> %load to <4 x i32>
1802 store <4 x i32> %ext, ptr addrspace(1) %out
1806 ; TODO: These should use LSHR instead of BFE_UINT
1807 define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
1808 ; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i32:
1809 ; GCN-NOHSA-SI: ; %bb.0:
1810 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
1811 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
1812 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
1813 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
1814 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
1815 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
1816 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
1817 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
1818 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1819 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
1820 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
1821 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
1822 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1823 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1824 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v3
1825 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v2
1826 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v1
1827 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v0
1828 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v3
1829 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v2
1830 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16
1831 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1832 ; GCN-NOHSA-SI-NEXT: s_endpgm
1834 ; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i32:
1836 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
1837 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
1838 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
1839 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
1840 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1841 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
1842 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
1843 ; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3
1844 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1
1845 ; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2
1846 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0
1847 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
1848 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v3
1849 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v2
1850 ; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v3
1851 ; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v2
1852 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1853 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1854 ; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v1
1855 ; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v0
1856 ; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11]
1857 ; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
1858 ; GCN-HSA-NEXT: s_endpgm
1860 ; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i32:
1861 ; GCN-NOHSA-VI: ; %bb.0:
1862 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
1863 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
1864 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
1865 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
1866 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
1867 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
1868 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
1869 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
1870 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1871 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
1872 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
1873 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
1874 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v3
1875 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v3
1876 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v2
1877 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v2
1878 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1879 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v1
1880 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1881 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v0
1882 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16
1883 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1884 ; GCN-NOHSA-VI-NEXT: s_endpgm
1886 ; EG-LABEL: global_zextload_v8i16_to_v8i32:
1888 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1890 ; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[]
1891 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
1892 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1894 ; EG-NEXT: Fetch clause starting at 6:
1895 ; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
1896 ; EG-NEXT: ALU clause starting at 8:
1897 ; EG-NEXT: MOV * T7.X, KC0[2].Z,
1898 ; EG-NEXT: ALU clause starting at 9:
1899 ; EG-NEXT: LSHR * T8.W, T7.Y, literal.x,
1900 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1901 ; EG-NEXT: AND_INT * T8.Z, T7.Y, literal.x,
1902 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
1903 ; EG-NEXT: LSHR T8.Y, T7.X, literal.x,
1904 ; EG-NEXT: LSHR * T9.W, T7.W, literal.x,
1905 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1906 ; EG-NEXT: AND_INT T8.X, T7.X, literal.x,
1907 ; EG-NEXT: AND_INT T9.Z, T7.W, literal.x,
1908 ; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.y,
1909 ; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
1910 ; EG-NEXT: LSHR * T9.Y, T7.Z, literal.x,
1911 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1912 ; EG-NEXT: AND_INT T9.X, T7.Z, literal.x,
1913 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
1914 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
1915 ; EG-NEXT: LSHR * T10.X, PV.W, literal.x,
1916 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1918 ; CM-LABEL: global_zextload_v8i16_to_v8i32:
1920 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1922 ; CM-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[]
1923 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T10.X
1924 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T9.X
1926 ; CM-NEXT: Fetch clause starting at 6:
1927 ; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
1928 ; CM-NEXT: ALU clause starting at 8:
1929 ; CM-NEXT: MOV * T7.X, KC0[2].Z,
1930 ; CM-NEXT: ALU clause starting at 9:
1931 ; CM-NEXT: LSHR * T8.W, T7.W, literal.x,
1932 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1933 ; CM-NEXT: AND_INT * T8.Z, T7.W, literal.x,
1934 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
1935 ; CM-NEXT: LSHR T8.Y, T7.Z, literal.x,
1936 ; CM-NEXT: LSHR * T7.W, T7.Y, literal.x,
1937 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1938 ; CM-NEXT: AND_INT T8.X, T7.Z, literal.x,
1939 ; CM-NEXT: AND_INT T7.Z, T7.Y, literal.x,
1940 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
1941 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
1942 ; CM-NEXT: LSHR T9.X, PV.W, literal.x,
1943 ; CM-NEXT: LSHR * T7.Y, T7.X, literal.y,
1944 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
1945 ; CM-NEXT: AND_INT * T7.X, T7.X, literal.x,
1946 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
1947 ; CM-NEXT: LSHR * T10.X, KC0[2].Y, literal.x,
1948 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1949 %load = load <8 x i16>, ptr addrspace(1) %in
1950 %ext = zext <8 x i16> %load to <8 x i32>
1951 store <8 x i32> %ext, ptr addrspace(1) %out
1955 ; TODO: These should use ASHR instead of LSHR + BFE_INT
1956 define amdgpu_kernel void @global_sextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
1957 ; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i32:
1958 ; GCN-NOHSA-SI: ; %bb.0:
1959 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
1960 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
1961 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
1962 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
1963 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
1964 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
1965 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
1966 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
1967 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1968 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
1969 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
1970 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
1971 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v1
1972 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v0
1973 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v1, 0, 16
1974 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v0, 0, 16
1975 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v3
1976 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v2
1977 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v3, 0, 16
1978 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v2, 0, 16
1979 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16
1980 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1981 ; GCN-NOHSA-SI-NEXT: s_endpgm
1983 ; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i32:
1985 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
1986 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
1987 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
1988 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
1989 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
1990 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
1991 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
1992 ; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3
1993 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1
1994 ; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2
1995 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0
1996 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
1997 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v3
1998 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v2
1999 ; GCN-HSA-NEXT: v_bfe_i32 v10, v3, 0, 16
2000 ; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16
2001 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v1
2002 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v0
2003 ; GCN-HSA-NEXT: v_bfe_i32 v6, v1, 0, 16
2004 ; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16
2005 ; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11]
2006 ; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
2007 ; GCN-HSA-NEXT: s_endpgm
2009 ; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i32:
2010 ; GCN-NOHSA-VI: ; %bb.0:
2011 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
2012 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
2013 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
2014 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
2015 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
2016 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
2017 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
2018 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
2019 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2020 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
2021 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
2022 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
2023 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v3
2024 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v2
2025 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v3, 0, 16
2026 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v2, 0, 16
2027 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v1
2028 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v0
2029 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v1, 0, 16
2030 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v0, 0, 16
2031 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16
2032 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
2033 ; GCN-NOHSA-VI-NEXT: s_endpgm
2035 ; EG-LABEL: global_sextload_v8i16_to_v8i32:
2037 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
2039 ; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
2040 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
2041 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
2043 ; EG-NEXT: Fetch clause starting at 6:
2044 ; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
2045 ; EG-NEXT: ALU clause starting at 8:
2046 ; EG-NEXT: MOV * T7.X, KC0[2].Z,
2047 ; EG-NEXT: ALU clause starting at 9:
2048 ; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
2049 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2050 ; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x,
2051 ; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x,
2052 ; EG-NEXT: LSHR * T0.W, T7.Y, literal.x,
2053 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2054 ; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x,
2055 ; EG-NEXT: LSHR T0.Z, T7.W, literal.x,
2056 ; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x,
2057 ; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
2058 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2059 ; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
2060 ; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y,
2061 ; EG-NEXT: LSHR T1.Z, T7.Z, literal.y,
2062 ; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y,
2063 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2064 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2065 ; EG-NEXT: LSHR T10.X, PS, literal.x,
2066 ; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
2067 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2069 ; CM-LABEL: global_sextload_v8i16_to_v8i32:
2071 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
2073 ; CM-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
2074 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T7.X
2075 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T10.X
2077 ; CM-NEXT: Fetch clause starting at 6:
2078 ; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
2079 ; CM-NEXT: ALU clause starting at 8:
2080 ; CM-NEXT: MOV * T7.X, KC0[2].Z,
2081 ; CM-NEXT: ALU clause starting at 9:
2082 ; CM-NEXT: BFE_INT * T8.Z, T7.W, 0.0, literal.x,
2083 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2084 ; CM-NEXT: BFE_INT T8.X, T7.Z, 0.0, literal.x,
2085 ; CM-NEXT: LSHR T0.Y, T7.Y, literal.x,
2086 ; CM-NEXT: BFE_INT T9.Z, T7.Y, 0.0, literal.x,
2087 ; CM-NEXT: LSHR * T0.W, T7.W, literal.x,
2088 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2089 ; CM-NEXT: BFE_INT T9.X, T7.X, 0.0, literal.x,
2090 ; CM-NEXT: LSHR T1.Y, T7.Z, literal.x,
2091 ; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x,
2092 ; CM-NEXT: BFE_INT * T8.W, PV.W, 0.0, literal.x,
2093 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2094 ; CM-NEXT: LSHR T10.X, PV.Z, literal.x,
2095 ; CM-NEXT: BFE_INT T8.Y, PV.Y, 0.0, literal.y,
2096 ; CM-NEXT: LSHR T0.Z, T7.X, literal.y,
2097 ; CM-NEXT: BFE_INT * T9.W, T0.Y, 0.0, literal.y,
2098 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2099 ; CM-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
2100 ; CM-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
2101 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2102 %load = load <8 x i16>, ptr addrspace(1) %in
2103 %ext = sext <8 x i16> %load to <8 x i32>
2104 store <8 x i32> %ext, ptr addrspace(1) %out
2108 define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
2109 ; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i32:
2110 ; GCN-NOHSA-SI: ; %bb.0:
2111 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
2112 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
2113 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
2114 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
2115 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
2116 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
2117 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
2118 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
2119 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2120 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
2121 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
2122 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2123 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1)
2124 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v1
2125 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v0
2126 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v3
2127 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v2
2128 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
2129 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v5
2130 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v4
2131 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v7
2132 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v6
2133 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v1
2134 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v0
2135 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v14, 0xffff, v3
2136 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v2
2137 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v5
2138 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v4
2139 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, 0xffff, v7
2140 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v6
2141 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:48
2142 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
2143 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
2144 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0
2145 ; GCN-NOHSA-SI-NEXT: s_endpgm
2147 ; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i32:
2149 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
2150 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
2151 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 16
2152 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
2153 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
2154 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
2155 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
2156 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
2157 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2158 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
2159 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
2160 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
2161 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3
2162 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2
2163 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
2164 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s1
2165 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
2166 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, s0
2167 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 32
2168 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
2169 ; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
2170 ; GCN-HSA-NEXT: v_mov_b32_e32 v27, s1
2171 ; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
2172 ; GCN-HSA-NEXT: v_mov_b32_e32 v26, s0
2173 ; GCN-HSA-NEXT: s_waitcnt vmcnt(1)
2174 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v1
2175 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v0
2176 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v15, 16, v3
2177 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v2
2178 ; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v1
2179 ; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v0
2180 ; GCN-HSA-NEXT: v_and_b32_e32 v14, 0xffff, v3
2181 ; GCN-HSA-NEXT: v_and_b32_e32 v12, 0xffff, v2
2182 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
2183 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v7
2184 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v6
2185 ; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v7
2186 ; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v6
2187 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v5
2188 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v4
2189 ; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v5
2190 ; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v4
2191 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3]
2192 ; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19]
2193 ; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[12:15]
2194 ; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11]
2195 ; GCN-HSA-NEXT: s_endpgm
2197 ; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i32:
2198 ; GCN-NOHSA-VI: ; %bb.0:
2199 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
2200 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
2201 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
2202 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
2203 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
2204 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
2205 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
2206 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
2207 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2208 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2209 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
2210 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
2211 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
2212 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v1
2213 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
2214 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v7
2215 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v7
2216 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v6
2217 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v6
2218 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v1
2219 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v0
2220 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v0
2221 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v3
2222 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, 0xffff, v3
2223 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v2
2224 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v2
2225 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v5
2226 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v5
2227 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4
2228 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v4
2229 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
2230 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2231 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
2232 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0
2233 ; GCN-NOHSA-VI-NEXT: s_endpgm
2235 ; EG-LABEL: global_zextload_v16i16_to_v16i32:
2237 ; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
2239 ; EG-NEXT: ALU 35, @13, KC0[CB0:0-32], KC1[]
2240 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
2241 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0
2242 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
2243 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1
2245 ; EG-NEXT: Fetch clause starting at 8:
2246 ; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1
2247 ; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1
2248 ; EG-NEXT: ALU clause starting at 12:
2249 ; EG-NEXT: MOV * T11.X, KC0[2].Z,
2250 ; EG-NEXT: ALU clause starting at 13:
2251 ; EG-NEXT: LSHR * T13.W, T12.Y, literal.x,
2252 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2253 ; EG-NEXT: AND_INT * T13.Z, T12.Y, literal.x,
2254 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2255 ; EG-NEXT: LSHR T13.Y, T12.X, literal.x,
2256 ; EG-NEXT: LSHR * T14.W, T12.W, literal.x,
2257 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2258 ; EG-NEXT: AND_INT T13.X, T12.X, literal.x,
2259 ; EG-NEXT: AND_INT T14.Z, T12.W, literal.x,
2260 ; EG-NEXT: LSHR * T12.X, KC0[2].Y, literal.y,
2261 ; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
2262 ; EG-NEXT: LSHR T14.Y, T12.Z, literal.x,
2263 ; EG-NEXT: LSHR * T15.W, T11.Y, literal.x,
2264 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2265 ; EG-NEXT: AND_INT T14.X, T12.Z, literal.x,
2266 ; EG-NEXT: AND_INT T15.Z, T11.Y, literal.x,
2267 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2268 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
2269 ; EG-NEXT: LSHR T16.X, PV.W, literal.x,
2270 ; EG-NEXT: LSHR T15.Y, T11.X, literal.y,
2271 ; EG-NEXT: LSHR T17.W, T11.W, literal.y,
2272 ; EG-NEXT: AND_INT * T15.X, T11.X, literal.z,
2273 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2274 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2275 ; EG-NEXT: AND_INT T17.Z, T11.W, literal.x,
2276 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2277 ; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44)
2278 ; EG-NEXT: LSHR T11.X, PV.W, literal.x,
2279 ; EG-NEXT: LSHR T17.Y, T11.Z, literal.y,
2280 ; EG-NEXT: AND_INT * T17.X, T11.Z, literal.z,
2281 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2282 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2283 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
2284 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
2285 ; EG-NEXT: LSHR * T18.X, PV.W, literal.x,
2286 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2288 ; CM-LABEL: global_zextload_v16i16_to_v16i32:
2290 ; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
2292 ; CM-NEXT: ALU 33, @13, KC0[CB0:0-32], KC1[]
2293 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T18.X
2294 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T17.X
2295 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T16.X
2296 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T14.X
2298 ; CM-NEXT: Fetch clause starting at 8:
2299 ; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
2300 ; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
2301 ; CM-NEXT: ALU clause starting at 12:
2302 ; CM-NEXT: MOV * T11.X, KC0[2].Z,
2303 ; CM-NEXT: ALU clause starting at 13:
2304 ; CM-NEXT: LSHR * T13.W, T12.W, literal.x,
2305 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2306 ; CM-NEXT: AND_INT * T13.Z, T12.W, literal.x,
2307 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2308 ; CM-NEXT: LSHR T13.Y, T12.Z, literal.x,
2309 ; CM-NEXT: LSHR * T12.W, T12.Y, literal.x,
2310 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2311 ; CM-NEXT: AND_INT T13.X, T12.Z, literal.x,
2312 ; CM-NEXT: AND_INT T12.Z, T12.Y, literal.x,
2313 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2314 ; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44)
2315 ; CM-NEXT: LSHR T14.X, PV.W, literal.x,
2316 ; CM-NEXT: LSHR T12.Y, T12.X, literal.y,
2317 ; CM-NEXT: LSHR * T15.W, T11.W, literal.y,
2318 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2319 ; CM-NEXT: AND_INT T12.X, T12.X, literal.x,
2320 ; CM-NEXT: AND_INT T15.Z, T11.W, literal.x,
2321 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2322 ; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44)
2323 ; CM-NEXT: LSHR T16.X, PV.W, literal.x,
2324 ; CM-NEXT: LSHR T15.Y, T11.Z, literal.y,
2325 ; CM-NEXT: LSHR * T11.W, T11.Y, literal.y,
2326 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2327 ; CM-NEXT: AND_INT T15.X, T11.Z, literal.x,
2328 ; CM-NEXT: AND_INT T11.Z, T11.Y, literal.x,
2329 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2330 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
2331 ; CM-NEXT: LSHR T17.X, PV.W, literal.x,
2332 ; CM-NEXT: LSHR * T11.Y, T11.X, literal.y,
2333 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2334 ; CM-NEXT: AND_INT * T11.X, T11.X, literal.x,
2335 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2336 ; CM-NEXT: LSHR * T18.X, KC0[2].Y, literal.x,
2337 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2338 %load = load <16 x i16>, ptr addrspace(1) %in
2339 %ext = zext <16 x i16> %load to <16 x i32>
2340 store <16 x i32> %ext, ptr addrspace(1) %out
2344 define amdgpu_kernel void @global_sextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
2345 ; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i32:
2346 ; GCN-NOHSA-SI: ; %bb.0:
2347 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
2348 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
2349 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
2350 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
2351 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
2352 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
2353 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
2354 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
2355 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2356 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
2357 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
2358 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2359 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1)
2360 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v1
2361 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v0
2362 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v1, 0, 16
2363 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v0, 0, 16
2364 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 16, v3
2365 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 16, v2
2366 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v14, v3, 0, 16
2367 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v2, 0, 16
2368 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
2369 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v5
2370 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v4
2371 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v5, 0, 16
2372 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v4, 0, 16
2373 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v19, 16, v7
2374 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 16, v6
2375 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v18, v7, 0, 16
2376 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v6, 0, 16
2377 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
2378 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2379 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
2380 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0
2381 ; GCN-NOHSA-SI-NEXT: s_endpgm
2383 ; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i32:
2385 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
2386 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
2387 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
2388 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
2389 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 16
2390 ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0
2391 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
2392 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2393 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
2394 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
2395 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
2396 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
2397 ; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3
2398 ; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2
2399 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
2400 ; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1
2401 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
2402 ; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0
2403 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 32
2404 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3
2405 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
2406 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2
2407 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s1
2408 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s0
2409 ; GCN-HSA-NEXT: s_waitcnt vmcnt(1)
2410 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v0
2411 ; GCN-HSA-NEXT: v_bfe_i32 v10, v1, 0, 16
2412 ; GCN-HSA-NEXT: v_bfe_i32 v8, v0, 0, 16
2413 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v3
2414 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v2
2415 ; GCN-HSA-NEXT: v_bfe_i32 v14, v3, 0, 16
2416 ; GCN-HSA-NEXT: v_bfe_i32 v12, v2, 0, 16
2417 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v1
2418 ; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15]
2419 ; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11]
2420 ; GCN-HSA-NEXT: s_waitcnt vmcnt(2)
2421 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v5
2422 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v7
2423 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 16, v6
2424 ; GCN-HSA-NEXT: v_bfe_i32 v9, v7, 0, 16
2425 ; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16
2426 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v4
2427 ; GCN-HSA-NEXT: v_bfe_i32 v2, v5, 0, 16
2428 ; GCN-HSA-NEXT: v_bfe_i32 v0, v4, 0, 16
2429 ; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[7:10]
2430 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3]
2431 ; GCN-HSA-NEXT: s_endpgm
2433 ; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i32:
2434 ; GCN-NOHSA-VI: ; %bb.0:
2435 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
2436 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
2437 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
2438 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
2439 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
2440 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
2441 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
2442 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
2443 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2444 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2445 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
2446 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
2447 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
2448 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v1
2449 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
2450 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v7
2451 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v6
2452 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v7, 0, 16
2453 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v6, 0, 16
2454 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v0
2455 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v1, 0, 16
2456 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v0, 0, 16
2457 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 16, v3
2458 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 16, v2
2459 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v3, 0, 16
2460 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v2, 0, 16
2461 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v5
2462 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v4
2463 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v5, 0, 16
2464 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v4, 0, 16
2465 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
2466 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2467 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
2468 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0
2469 ; GCN-NOHSA-VI-NEXT: s_endpgm
2471 ; EG-LABEL: global_sextload_v16i16_to_v16i32:
2473 ; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
2475 ; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[]
2476 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
2477 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
2478 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
2479 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
2481 ; EG-NEXT: Fetch clause starting at 8:
2482 ; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
2483 ; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
2484 ; EG-NEXT: ALU clause starting at 12:
2485 ; EG-NEXT: MOV * T11.X, KC0[2].Z,
2486 ; EG-NEXT: ALU clause starting at 13:
2487 ; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
2488 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2489 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2490 ; EG-NEXT: LSHR T14.X, PV.W, literal.x,
2491 ; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
2492 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2493 ; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x,
2494 ; EG-NEXT: LSHR T0.Y, T12.W, literal.x,
2495 ; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
2496 ; EG-NEXT: LSHR T0.W, T12.Y, literal.x,
2497 ; EG-NEXT: LSHR * T1.W, T11.Y, literal.x,
2498 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2499 ; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x,
2500 ; EG-NEXT: LSHR T1.Y, T11.W, literal.x,
2501 ; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x,
2502 ; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x,
2503 ; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
2504 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2505 ; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x,
2506 ; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x,
2507 ; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x,
2508 ; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x,
2509 ; EG-NEXT: LSHR * T1.W, T11.Z, literal.x,
2510 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2511 ; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x,
2512 ; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x,
2513 ; EG-NEXT: LSHR T0.Z, T12.X, literal.x,
2514 ; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x,
2515 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2516 ; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44)
2517 ; EG-NEXT: LSHR T11.X, PS, literal.x,
2518 ; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y,
2519 ; EG-NEXT: LSHR T0.Z, T12.Z, literal.y,
2520 ; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y,
2521 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
2522 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2523 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
2524 ; EG-NEXT: LSHR T12.X, PS, literal.x,
2525 ; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
2526 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2528 ; CM-LABEL: global_sextload_v16i16_to_v16i32:
2530 ; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
2532 ; CM-NEXT: ALU 40, @13, KC0[CB0:0-32], KC1[]
2533 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T11.X
2534 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T18.X
2535 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T16, T14.X
2536 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T13.X
2538 ; CM-NEXT: Fetch clause starting at 8:
2539 ; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
2540 ; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
2541 ; CM-NEXT: ALU clause starting at 12:
2542 ; CM-NEXT: MOV * T11.X, KC0[2].Z,
2543 ; CM-NEXT: ALU clause starting at 13:
2544 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
2545 ; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00)
2546 ; CM-NEXT: LSHR T13.X, PV.W, literal.x,
2547 ; CM-NEXT: LSHR T0.Y, T11.Y, literal.y,
2548 ; CM-NEXT: LSHR T0.Z, T11.Z, literal.y,
2549 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
2550 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2551 ; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
2552 ; CM-NEXT: LSHR T14.X, PV.W, literal.x,
2553 ; CM-NEXT: LSHR T1.Y, T11.W, literal.y,
2554 ; CM-NEXT: BFE_INT T15.Z, T12.W, 0.0, literal.y, BS:VEC_120/SCL_212
2555 ; CM-NEXT: LSHR * T0.W, T12.X, literal.y,
2556 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2557 ; CM-NEXT: BFE_INT T15.X, T12.Z, 0.0, literal.x,
2558 ; CM-NEXT: LSHR T2.Y, T12.Y, literal.x,
2559 ; CM-NEXT: BFE_INT T16.Z, T12.Y, 0.0, literal.x,
2560 ; CM-NEXT: LSHR * T1.W, T12.W, literal.x,
2561 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2562 ; CM-NEXT: BFE_INT T16.X, T12.X, 0.0, literal.x,
2563 ; CM-NEXT: LSHR T3.Y, T12.Z, literal.x,
2564 ; CM-NEXT: BFE_INT T12.Z, T11.W, 0.0, literal.x,
2565 ; CM-NEXT: BFE_INT * T15.W, PV.W, 0.0, literal.x,
2566 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2567 ; CM-NEXT: BFE_INT T12.X, T11.Z, 0.0, literal.x,
2568 ; CM-NEXT: BFE_INT T15.Y, PV.Y, 0.0, literal.x,
2569 ; CM-NEXT: BFE_INT T17.Z, T11.Y, 0.0, literal.x,
2570 ; CM-NEXT: BFE_INT * T16.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
2571 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2572 ; CM-NEXT: BFE_INT T17.X, T11.X, 0.0, literal.x,
2573 ; CM-NEXT: BFE_INT T16.Y, T0.W, 0.0, literal.x,
2574 ; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.x,
2575 ; CM-NEXT: BFE_INT * T12.W, T1.Y, 0.0, literal.x,
2576 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2577 ; CM-NEXT: LSHR T18.X, PV.Z, literal.x,
2578 ; CM-NEXT: BFE_INT T12.Y, T0.Z, 0.0, literal.y,
2579 ; CM-NEXT: LSHR T0.Z, T11.X, literal.y,
2580 ; CM-NEXT: BFE_INT * T17.W, T0.Y, 0.0, literal.y,
2581 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2582 ; CM-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
2583 ; CM-NEXT: BFE_INT * T17.Y, PV.Z, 0.0, literal.y,
2584 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2585 %load = load <16 x i16>, ptr addrspace(1) %in
2586 %ext = sext <16 x i16> %load to <16 x i32>
2587 store <16 x i32> %ext, ptr addrspace(1) %out
2591 define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
2592 ; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i32:
2593 ; GCN-NOHSA-SI: ; %bb.0:
2594 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
2595 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
2596 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
2597 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
2598 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
2599 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
2600 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
2601 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
2602 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2603 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2604 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
2605 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
2606 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3)
2607 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v3
2608 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v2
2609 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v1
2610 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v0
2611 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2)
2612 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v27, 16, v7
2613 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v25, 16, v6
2614 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v3
2615 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v2
2616 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, 0xffff, v1
2617 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v0
2618 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v5
2619 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v4
2620 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, 0xffff, v7
2621 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v24, 0xffff, v6
2622 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v5
2623 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v4
2624 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1)
2625 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v11
2626 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v10
2627 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v31, 16, v9
2628 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v29, 16, v8
2629 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v11
2630 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v10
2631 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, 0xffff, v9
2632 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v28, 0xffff, v8
2633 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
2634 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v15
2635 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v14
2636 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v35, 16, v13
2637 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v33, 16, v12
2638 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v15
2639 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v14
2640 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, 0xffff, v13
2641 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v32, 0xffff, v12
2642 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
2643 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
2644 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
2645 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
2646 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
2647 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
2648 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2649 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48
2650 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0
2651 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
2652 ; GCN-NOHSA-SI-NEXT: s_endpgm
2654 ; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i32:
2656 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
2657 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
2658 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 16
2659 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
2660 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
2661 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
2662 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 32
2663 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
2664 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
2665 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
2666 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
2667 ; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
2668 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
2669 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
2670 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 48
2671 ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0
2672 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2673 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
2674 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
2675 ; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
2676 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
2677 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
2678 ; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
2679 ; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
2680 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
2681 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
2682 ; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3
2683 ; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2
2684 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
2685 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
2686 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 64
2687 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
2688 ; GCN-HSA-NEXT: s_add_u32 s6, s0, 0x50
2689 ; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0
2690 ; GCN-HSA-NEXT: s_add_u32 s8, s0, 32
2691 ; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0
2692 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s9
2693 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s8
2694 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s1
2695 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, s0
2696 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 48
2697 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
2698 ; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3
2699 ; GCN-HSA-NEXT: v_mov_b32_e32 v31, s1
2700 ; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2
2701 ; GCN-HSA-NEXT: v_mov_b32_e32 v30, s0
2702 ; GCN-HSA-NEXT: s_waitcnt vmcnt(3)
2703 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v5
2704 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v4
2705 ; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v5
2706 ; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v4
2707 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
2708 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[16:19]
2709 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5
2710 ; GCN-HSA-NEXT: s_waitcnt vmcnt(3)
2711 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v13
2712 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v12
2713 ; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v13
2714 ; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v12
2715 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, s7
2716 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
2717 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v14
2718 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v7
2719 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v6
2720 ; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v7
2721 ; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v6
2722 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v15
2723 ; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v15
2724 ; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v14
2725 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s6
2726 ; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
2727 ; GCN-HSA-NEXT: s_waitcnt vmcnt(4)
2728 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v23, 16, v3
2729 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v1
2730 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v0
2731 ; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v1
2732 ; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v0
2733 ; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[4:7]
2734 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v21, 16, v2
2735 ; GCN-HSA-NEXT: v_and_b32_e32 v22, 0xffff, v3
2736 ; GCN-HSA-NEXT: v_and_b32_e32 v20, 0xffff, v2
2737 ; GCN-HSA-NEXT: s_waitcnt vmcnt(4)
2738 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v11
2739 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v10
2740 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v9
2741 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v12, 16, v8
2742 ; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v11
2743 ; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v10
2744 ; GCN-HSA-NEXT: v_and_b32_e32 v13, 0xffff, v9
2745 ; GCN-HSA-NEXT: v_and_b32_e32 v11, 0xffff, v8
2746 ; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[20:23]
2747 ; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[11:14]
2748 ; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[4:7]
2749 ; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19]
2750 ; GCN-HSA-NEXT: s_endpgm
2752 ; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i32:
2753 ; GCN-NOHSA-VI: ; %bb.0:
2754 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
2755 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
2756 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
2757 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
2758 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
2759 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
2760 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
2761 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
2762 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2763 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2764 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
2765 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
2766 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
2767 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
2768 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3)
2769 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v3
2770 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v3
2771 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v2
2772 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
2773 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v31, 16, v15
2774 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v30, 0xffff, v15
2775 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v29, 16, v14
2776 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, 0xffff, v14
2777 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v13
2778 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, 0xffff, v13
2779 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v12
2780 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v12
2781 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v2
2782 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1
2783 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v1
2784 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2785 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
2786 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v23, 16, v7
2787 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v22, 0xffff, v7
2788 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v21, 16, v6
2789 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v6
2790 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v5
2791 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v5
2792 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v4
2793 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v4
2794 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v27, 16, v11
2795 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v26, 0xffff, v11
2796 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v25, 16, v10
2797 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, 0xffff, v10
2798 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v9
2799 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v9
2800 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v8
2801 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v8
2802 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:96
2803 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:112
2804 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64
2805 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:80
2806 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
2807 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:48
2808 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2809 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
2810 ; GCN-NOHSA-VI-NEXT: s_endpgm
2812 ; EG-LABEL: global_zextload_v32i16_to_v32i32:
2814 ; EG-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[]
2815 ; EG-NEXT: TEX 3 @12
2816 ; EG-NEXT: ALU 72, @21, KC0[CB0:0-32], KC1[]
2817 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T34.X, 0
2818 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T33.X, 0
2819 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0
2820 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T30.X, 0
2821 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T29.X, 0
2822 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0
2823 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0
2824 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T24.X, 1
2826 ; EG-NEXT: Fetch clause starting at 12:
2827 ; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1
2828 ; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 48, #1
2829 ; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1
2830 ; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1
2831 ; EG-NEXT: ALU clause starting at 20:
2832 ; EG-NEXT: MOV * T19.X, KC0[2].Z,
2833 ; EG-NEXT: ALU clause starting at 21:
2834 ; EG-NEXT: LSHR * T23.W, T20.W, literal.x,
2835 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2836 ; EG-NEXT: AND_INT * T23.Z, T20.W, literal.x,
2837 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2838 ; EG-NEXT: LSHR T23.Y, T20.Z, literal.x,
2839 ; EG-NEXT: LSHR * T20.W, T20.Y, literal.x,
2840 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2841 ; EG-NEXT: AND_INT T23.X, T20.Z, literal.x,
2842 ; EG-NEXT: AND_INT T20.Z, T20.Y, literal.x,
2843 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2844 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
2845 ; EG-NEXT: LSHR T24.X, PV.W, literal.x,
2846 ; EG-NEXT: LSHR T20.Y, T20.X, literal.y,
2847 ; EG-NEXT: LSHR T25.W, T19.W, literal.y,
2848 ; EG-NEXT: AND_INT * T20.X, T20.X, literal.z,
2849 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2850 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2851 ; EG-NEXT: AND_INT * T25.Z, T19.W, literal.x,
2852 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2853 ; EG-NEXT: LSHR T26.X, KC0[2].Y, literal.x,
2854 ; EG-NEXT: LSHR T25.Y, T19.Z, literal.y,
2855 ; EG-NEXT: LSHR T19.W, T19.Y, literal.y,
2856 ; EG-NEXT: AND_INT * T25.X, T19.Z, literal.z,
2857 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2858 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2859 ; EG-NEXT: AND_INT T19.Z, T19.Y, literal.x,
2860 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2861 ; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44)
2862 ; EG-NEXT: LSHR T27.X, PV.W, literal.x,
2863 ; EG-NEXT: LSHR T19.Y, T19.X, literal.y,
2864 ; EG-NEXT: LSHR T28.W, T22.W, literal.y,
2865 ; EG-NEXT: AND_INT * T19.X, T19.X, literal.z,
2866 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2867 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2868 ; EG-NEXT: AND_INT T28.Z, T22.W, literal.x,
2869 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2870 ; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44)
2871 ; EG-NEXT: LSHR T29.X, PV.W, literal.x,
2872 ; EG-NEXT: LSHR T28.Y, T22.Z, literal.y,
2873 ; EG-NEXT: LSHR T22.W, T22.Y, literal.y,
2874 ; EG-NEXT: AND_INT * T28.X, T22.Z, literal.z,
2875 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2876 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2877 ; EG-NEXT: AND_INT T22.Z, T22.Y, literal.x,
2878 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2879 ; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43)
2880 ; EG-NEXT: LSHR T30.X, PV.W, literal.x,
2881 ; EG-NEXT: LSHR T22.Y, T22.X, literal.y,
2882 ; EG-NEXT: LSHR T31.W, T21.W, literal.y,
2883 ; EG-NEXT: AND_INT * T22.X, T22.X, literal.z,
2884 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2885 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2886 ; EG-NEXT: AND_INT T31.Z, T21.W, literal.x,
2887 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2888 ; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44)
2889 ; EG-NEXT: LSHR T32.X, PV.W, literal.x,
2890 ; EG-NEXT: LSHR T31.Y, T21.Z, literal.y,
2891 ; EG-NEXT: LSHR T21.W, T21.Y, literal.y,
2892 ; EG-NEXT: AND_INT * T31.X, T21.Z, literal.z,
2893 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2894 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2895 ; EG-NEXT: AND_INT T21.Z, T21.Y, literal.x,
2896 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2897 ; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43)
2898 ; EG-NEXT: LSHR T33.X, PV.W, literal.x,
2899 ; EG-NEXT: LSHR T21.Y, T21.X, literal.y,
2900 ; EG-NEXT: AND_INT * T21.X, T21.X, literal.z,
2901 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2902 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2903 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
2904 ; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
2905 ; EG-NEXT: LSHR * T34.X, PV.W, literal.x,
2906 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2908 ; CM-LABEL: global_zextload_v32i16_to_v32i32:
2910 ; CM-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[]
2911 ; CM-NEXT: TEX 3 @12
2912 ; CM-NEXT: ALU 65, @21, KC0[CB0:0-32], KC1[]
2913 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T34.X
2914 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T21.X
2915 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T32.X
2916 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T28, T22.X
2917 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T27, T29.X
2918 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T19.X
2919 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T26.X
2920 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T20.X
2922 ; CM-NEXT: Fetch clause starting at 12:
2923 ; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1
2924 ; CM-NEXT: VTX_READ_128 T21.XYZW, T19.X, 0, #1
2925 ; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 16, #1
2926 ; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 32, #1
2927 ; CM-NEXT: ALU clause starting at 20:
2928 ; CM-NEXT: MOV * T19.X, KC0[2].Z,
2929 ; CM-NEXT: ALU clause starting at 21:
2930 ; CM-NEXT: LSHR * T23.W, T20.Y, literal.x,
2931 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2932 ; CM-NEXT: AND_INT * T23.Z, T20.Y, literal.x,
2933 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2934 ; CM-NEXT: LSHR T23.Y, T20.X, literal.x,
2935 ; CM-NEXT: LSHR * T24.W, T20.W, literal.x,
2936 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2937 ; CM-NEXT: AND_INT T23.X, T20.X, literal.x,
2938 ; CM-NEXT: AND_INT T24.Z, T20.W, literal.x,
2939 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2940 ; CM-NEXT: 65535(9.183409e-41), 96(1.345247e-43)
2941 ; CM-NEXT: LSHR T20.X, PV.W, literal.x,
2942 ; CM-NEXT: LSHR T24.Y, T20.Z, literal.y,
2943 ; CM-NEXT: LSHR * T25.W, T19.Y, literal.y,
2944 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2945 ; CM-NEXT: AND_INT T24.X, T20.Z, literal.x,
2946 ; CM-NEXT: AND_INT T25.Z, T19.Y, literal.x,
2947 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2948 ; CM-NEXT: 65535(9.183409e-41), 112(1.569454e-43)
2949 ; CM-NEXT: LSHR T26.X, PV.W, literal.x,
2950 ; CM-NEXT: LSHR T25.Y, T19.X, literal.y,
2951 ; CM-NEXT: LSHR * T27.W, T19.W, literal.y,
2952 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2953 ; CM-NEXT: AND_INT T25.X, T19.X, literal.x,
2954 ; CM-NEXT: AND_INT T27.Z, T19.W, literal.x,
2955 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2956 ; CM-NEXT: 65535(9.183409e-41), 64(8.968310e-44)
2957 ; CM-NEXT: LSHR T19.X, PV.W, literal.x,
2958 ; CM-NEXT: LSHR T27.Y, T19.Z, literal.y,
2959 ; CM-NEXT: LSHR * T28.W, T22.Y, literal.y,
2960 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2961 ; CM-NEXT: AND_INT T27.X, T19.Z, literal.x,
2962 ; CM-NEXT: AND_INT T28.Z, T22.Y, literal.x,
2963 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2964 ; CM-NEXT: 65535(9.183409e-41), 80(1.121039e-43)
2965 ; CM-NEXT: LSHR T29.X, PV.W, literal.x,
2966 ; CM-NEXT: LSHR T28.Y, T22.X, literal.y,
2967 ; CM-NEXT: LSHR * T30.W, T22.W, literal.y,
2968 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2969 ; CM-NEXT: AND_INT T28.X, T22.X, literal.x,
2970 ; CM-NEXT: AND_INT T30.Z, T22.W, literal.x,
2971 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2972 ; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44)
2973 ; CM-NEXT: LSHR T22.X, PV.W, literal.x,
2974 ; CM-NEXT: LSHR T30.Y, T22.Z, literal.y,
2975 ; CM-NEXT: LSHR * T31.W, T21.Y, literal.y,
2976 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2977 ; CM-NEXT: AND_INT T30.X, T22.Z, literal.x,
2978 ; CM-NEXT: AND_INT T31.Z, T21.Y, literal.x,
2979 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2980 ; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44)
2981 ; CM-NEXT: LSHR T32.X, PV.W, literal.x,
2982 ; CM-NEXT: LSHR T31.Y, T21.X, literal.y,
2983 ; CM-NEXT: LSHR * T33.W, T21.W, literal.y,
2984 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2985 ; CM-NEXT: AND_INT T31.X, T21.X, literal.x,
2986 ; CM-NEXT: AND_INT * T33.Z, T21.W, literal.x,
2987 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
2988 ; CM-NEXT: LSHR T21.X, KC0[2].Y, literal.x,
2989 ; CM-NEXT: LSHR * T33.Y, T21.Z, literal.y,
2990 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
2991 ; CM-NEXT: AND_INT T33.X, T21.Z, literal.x,
2992 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
2993 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
2994 ; CM-NEXT: LSHR * T34.X, PV.W, literal.x,
2995 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2996 %load = load <32 x i16>, ptr addrspace(1) %in
2997 %ext = zext <32 x i16> %load to <32 x i32>
2998 store <32 x i32> %ext, ptr addrspace(1) %out
3002 define amdgpu_kernel void @global_sextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
3003 ; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i32:
3004 ; GCN-NOHSA-SI: ; %bb.0:
3005 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
3006 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
3007 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
3008 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
3009 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
3010 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
3011 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
3012 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
3013 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
3014 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
3015 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
3016 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
3017 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3)
3018 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v19, 16, v3
3019 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 16, v2
3020 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v18, v3, 0, 16
3021 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v2, 0, 16
3022 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 16, v1
3023 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v21, 16, v0
3024 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v1, 0, 16
3025 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v20, v0, 0, 16
3026 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2)
3027 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v7
3028 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v6
3029 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v7, 0, 16
3030 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v6, 0, 16
3031 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v27, 16, v5
3032 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v4
3033 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v26, v5, 0, 16
3034 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v24, v4, 0, 16
3035 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1)
3036 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v11
3037 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v10
3038 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v11, 0, 16
3039 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v10, 0, 16
3040 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v31, 16, v9
3041 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v29, 16, v8
3042 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v30, v9, 0, 16
3043 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v28, v8, 0, 16
3044 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
3045 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v15
3046 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v14
3047 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v15, 0, 16
3048 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v14, 0, 16
3049 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v35, 16, v13
3050 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v33, 16, v12
3051 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v34, v13, 0, 16
3052 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v32, v12, 0, 16
3053 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
3054 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
3055 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
3056 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
3057 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
3058 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
3059 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
3060 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3061 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0
3062 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
3063 ; GCN-NOHSA-SI-NEXT: s_endpgm
3065 ; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i32:
3067 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
3068 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
3069 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
3070 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
3071 ; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
3072 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 48
3073 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
3074 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
3075 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
3076 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 32
3077 ; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
3078 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
3079 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
3080 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
3081 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 16
3082 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
3083 ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0
3084 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
3085 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
3086 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
3087 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
3088 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3089 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3
3090 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2
3091 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
3092 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3093 ; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
3094 ; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
3095 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
3096 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3097 ; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3
3098 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s1
3099 ; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2
3100 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 64
3101 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, s0
3102 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3103 ; GCN-HSA-NEXT: s_waitcnt vmcnt(3)
3104 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 16, v13
3105 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 16, v12
3106 ; GCN-HSA-NEXT: v_bfe_i32 v18, v13, 0, 16
3107 ; GCN-HSA-NEXT: v_bfe_i32 v16, v12, 0, 16
3108 ; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19]
3109 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, s3
3110 ; GCN-HSA-NEXT: v_mov_b32_e32 v19, s2
3111 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50
3112 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3113 ; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3
3114 ; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2
3115 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
3116 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v15
3117 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 16, v14
3118 ; GCN-HSA-NEXT: v_bfe_i32 v17, v15, 0, 16
3119 ; GCN-HSA-NEXT: v_bfe_i32 v15, v14, 0, 16
3120 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3121 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[15:18]
3122 ; GCN-HSA-NEXT: s_waitcnt vmcnt(4)
3123 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 16, v11
3124 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 16, v10
3125 ; GCN-HSA-NEXT: v_bfe_i32 v13, v11, 0, 16
3126 ; GCN-HSA-NEXT: v_bfe_i32 v11, v10, 0, 16
3127 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v9
3128 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 16, v8
3129 ; GCN-HSA-NEXT: v_bfe_i32 v17, v9, 0, 16
3130 ; GCN-HSA-NEXT: v_bfe_i32 v15, v8, 0, 16
3131 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 48
3132 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3
3133 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
3134 ; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[15:18]
3135 ; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[11:14]
3136 ; GCN-HSA-NEXT: s_waitcnt vmcnt(5)
3137 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v7
3138 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 16, v6
3139 ; GCN-HSA-NEXT: v_bfe_i32 v9, v7, 0, 16
3140 ; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16
3141 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 16, v5
3142 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 16, v4
3143 ; GCN-HSA-NEXT: v_bfe_i32 v13, v5, 0, 16
3144 ; GCN-HSA-NEXT: v_bfe_i32 v11, v4, 0, 16
3145 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2
3146 ; GCN-HSA-NEXT: v_mov_b32_e32 v31, s1
3147 ; GCN-HSA-NEXT: flat_store_dwordx4 v[19:20], v[11:14]
3148 ; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[7:10]
3149 ; GCN-HSA-NEXT: v_mov_b32_e32 v30, s0
3150 ; GCN-HSA-NEXT: s_waitcnt vmcnt(6)
3151 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v1
3152 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 16, v0
3153 ; GCN-HSA-NEXT: v_bfe_i32 v9, v1, 0, 16
3154 ; GCN-HSA-NEXT: v_bfe_i32 v7, v0, 0, 16
3155 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v6, 16, v3
3156 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v4, 16, v2
3157 ; GCN-HSA-NEXT: v_bfe_i32 v5, v3, 0, 16
3158 ; GCN-HSA-NEXT: v_bfe_i32 v3, v2, 0, 16
3159 ; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[7:10]
3160 ; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[3:6]
3161 ; GCN-HSA-NEXT: s_endpgm
3163 ; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i32:
3164 ; GCN-NOHSA-VI: ; %bb.0:
3165 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
3166 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
3167 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
3168 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
3169 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
3170 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
3171 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
3172 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
3173 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
3174 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
3175 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
3176 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
3177 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
3178 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
3179 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3)
3180 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v3
3181 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v2
3182 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v3, 0, 16
3183 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
3184 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v35, 16, v13
3185 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v33, 16, v12
3186 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v34, v13, 0, 16
3187 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v32, v12, 0, 16
3188 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v2, 0, 16
3189 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v23, 16, v1
3190 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 16, v0
3191 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v22, v1, 0, 16
3192 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v0, 0, 16
3193 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v7
3194 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v6
3195 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v7, 0, 16
3196 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v6, 0, 16
3197 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v27, 16, v5
3198 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v25, 16, v4
3199 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v26, v5, 0, 16
3200 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v24, v4, 0, 16
3201 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v11
3202 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v10
3203 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v11, 0, 16
3204 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v10, 0, 16
3205 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v31, 16, v9
3206 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v29, 16, v8
3207 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v30, v9, 0, 16
3208 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v28, v8, 0, 16
3209 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v15
3210 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v14
3211 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v15, 0, 16
3212 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v14, 0, 16
3213 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
3214 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
3215 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
3216 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
3217 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
3218 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3219 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0
3220 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
3221 ; GCN-NOHSA-VI-NEXT: s_endpgm
3223 ; EG-LABEL: global_sextload_v32i16_to_v32i32:
3225 ; EG-NEXT: ALU 9, @20, KC0[CB0:0-32], KC1[]
3226 ; EG-NEXT: TEX 3 @12
3227 ; EG-NEXT: ALU 73, @30, KC0[CB0:0-32], KC1[]
3228 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T22.X, 0
3229 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0
3230 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T28.X, 0
3231 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T27.X, 0
3232 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T26.X, 0
3233 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
3234 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0
3235 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1
3237 ; EG-NEXT: Fetch clause starting at 12:
3238 ; EG-NEXT: VTX_READ_128 T23.XYZW, T22.X, 16, #1
3239 ; EG-NEXT: VTX_READ_128 T24.XYZW, T22.X, 32, #1
3240 ; EG-NEXT: VTX_READ_128 T25.XYZW, T22.X, 0, #1
3241 ; EG-NEXT: VTX_READ_128 T22.XYZW, T22.X, 48, #1
3242 ; EG-NEXT: ALU clause starting at 20:
3243 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
3244 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3245 ; EG-NEXT: LSHR T19.X, PV.W, literal.x,
3246 ; EG-NEXT: LSHR * T20.X, KC0[2].Y, literal.x,
3247 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
3248 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
3249 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
3250 ; EG-NEXT: LSHR T21.X, PV.W, literal.x,
3251 ; EG-NEXT: MOV * T22.X, KC0[2].Z,
3252 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
3253 ; EG-NEXT: ALU clause starting at 30:
3254 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
3255 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
3256 ; EG-NEXT: LSHR T26.X, PV.W, literal.x,
3257 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
3258 ; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
3259 ; EG-NEXT: LSHR T27.X, PV.W, literal.x,
3260 ; EG-NEXT: LSHR T0.W, T22.Y, literal.y,
3261 ; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
3262 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3263 ; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
3264 ; EG-NEXT: LSHR T28.X, PS, literal.x,
3265 ; EG-NEXT: LSHR T0.Y, T22.W, literal.y,
3266 ; EG-NEXT: BFE_INT T29.Z, T25.W, 0.0, literal.y, BS:VEC_120/SCL_212
3267 ; EG-NEXT: LSHR T1.W, T24.Y, literal.y,
3268 ; EG-NEXT: LSHR * T2.W, T24.W, literal.y,
3269 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3270 ; EG-NEXT: BFE_INT T29.X, T25.Z, 0.0, literal.x,
3271 ; EG-NEXT: LSHR T1.Y, T23.Y, literal.x,
3272 ; EG-NEXT: BFE_INT T30.Z, T25.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3273 ; EG-NEXT: LSHR T3.W, T23.W, literal.x,
3274 ; EG-NEXT: LSHR * T4.W, T25.W, literal.x,
3275 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3276 ; EG-NEXT: BFE_INT T30.X, T25.X, 0.0, literal.x,
3277 ; EG-NEXT: LSHR T2.Y, T25.Y, literal.x,
3278 ; EG-NEXT: BFE_INT T31.Z, T23.W, 0.0, literal.x,
3279 ; EG-NEXT: BFE_INT T29.W, PS, 0.0, literal.x,
3280 ; EG-NEXT: LSHR * T4.W, T25.Z, literal.x,
3281 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3282 ; EG-NEXT: BFE_INT T31.X, T23.Z, 0.0, literal.x,
3283 ; EG-NEXT: BFE_INT T29.Y, PS, 0.0, literal.x,
3284 ; EG-NEXT: BFE_INT T25.Z, T23.Y, 0.0, literal.x,
3285 ; EG-NEXT: BFE_INT T30.W, PV.Y, 0.0, literal.x,
3286 ; EG-NEXT: LSHR * T4.W, T25.X, literal.x,
3287 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3288 ; EG-NEXT: BFE_INT T25.X, T23.X, 0.0, literal.x,
3289 ; EG-NEXT: BFE_INT T30.Y, PS, 0.0, literal.x,
3290 ; EG-NEXT: BFE_INT T32.Z, T24.W, 0.0, literal.x,
3291 ; EG-NEXT: BFE_INT T31.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212
3292 ; EG-NEXT: LSHR * T3.W, T23.Z, literal.x,
3293 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3294 ; EG-NEXT: BFE_INT T32.X, T24.Z, 0.0, literal.x,
3295 ; EG-NEXT: BFE_INT T31.Y, PS, 0.0, literal.x,
3296 ; EG-NEXT: BFE_INT T23.Z, T24.Y, 0.0, literal.x,
3297 ; EG-NEXT: BFE_INT T25.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3298 ; EG-NEXT: LSHR * T3.W, T23.X, literal.x,
3299 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3300 ; EG-NEXT: BFE_INT T23.X, T24.X, 0.0, literal.x,
3301 ; EG-NEXT: BFE_INT T25.Y, PS, 0.0, literal.x,
3302 ; EG-NEXT: BFE_INT T33.Z, T22.W, 0.0, literal.x,
3303 ; EG-NEXT: BFE_INT T32.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212
3304 ; EG-NEXT: LSHR * T2.W, T24.Z, literal.x,
3305 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3306 ; EG-NEXT: BFE_INT T33.X, T22.Z, 0.0, literal.x,
3307 ; EG-NEXT: BFE_INT T32.Y, PS, 0.0, literal.x,
3308 ; EG-NEXT: BFE_INT T24.Z, T22.Y, 0.0, literal.x,
3309 ; EG-NEXT: BFE_INT T23.W, T1.W, 0.0, literal.x,
3310 ; EG-NEXT: LSHR * T1.W, T24.X, literal.x,
3311 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3312 ; EG-NEXT: BFE_INT T24.X, T22.X, 0.0, literal.x,
3313 ; EG-NEXT: BFE_INT T23.Y, PS, 0.0, literal.x,
3314 ; EG-NEXT: LSHR T0.Z, T22.Z, literal.x,
3315 ; EG-NEXT: BFE_INT T33.W, T0.Y, 0.0, literal.x,
3316 ; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
3317 ; EG-NEXT: 16(2.242078e-44), 112(1.569454e-43)
3318 ; EG-NEXT: LSHR T34.X, PS, literal.x,
3319 ; EG-NEXT: BFE_INT T33.Y, PV.Z, 0.0, literal.y,
3320 ; EG-NEXT: LSHR T0.Z, T22.X, literal.y,
3321 ; EG-NEXT: BFE_INT T24.W, T0.W, 0.0, literal.y,
3322 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
3323 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3324 ; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
3325 ; EG-NEXT: LSHR T22.X, PS, literal.x,
3326 ; EG-NEXT: BFE_INT * T24.Y, PV.Z, 0.0, literal.y,
3327 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3329 ; CM-LABEL: global_sextload_v32i16_to_v32i32:
3331 ; CM-NEXT: ALU 0, @22, KC0[CB0:0-32], KC1[]
3332 ; CM-NEXT: TEX 0 @14
3333 ; CM-NEXT: ALU 7, @23, KC0[CB0:0-32], KC1[]
3334 ; CM-NEXT: TEX 2 @16
3335 ; CM-NEXT: ALU 76, @31, KC0[CB0:0-32], KC1[]
3336 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T34.X
3337 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T20.X
3338 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T32, T28.X
3339 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T27.X
3340 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T26.X
3341 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T22, T25.X
3342 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T24.X
3343 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T29, T21.X
3345 ; CM-NEXT: Fetch clause starting at 14:
3346 ; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1
3347 ; CM-NEXT: Fetch clause starting at 16:
3348 ; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 48, #1
3349 ; CM-NEXT: VTX_READ_128 T23.XYZW, T19.X, 32, #1
3350 ; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1
3351 ; CM-NEXT: ALU clause starting at 22:
3352 ; CM-NEXT: MOV * T19.X, KC0[2].Z,
3353 ; CM-NEXT: ALU clause starting at 23:
3354 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
3355 ; CM-NEXT: 96(1.345247e-43), 0(0.000000e+00)
3356 ; CM-NEXT: LSHR T21.X, PV.W, literal.x,
3357 ; CM-NEXT: LSHR T0.Y, T20.Z, literal.y,
3358 ; CM-NEXT: LSHR T0.Z, T20.W, literal.y,
3359 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
3360 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3361 ; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00)
3362 ; CM-NEXT: ALU clause starting at 31:
3363 ; CM-NEXT: LSHR T24.X, T0.W, literal.x,
3364 ; CM-NEXT: LSHR T1.Y, T20.Y, literal.y,
3365 ; CM-NEXT: LSHR T1.Z, T19.Z, literal.y,
3366 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
3367 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3368 ; CM-NEXT: 64(8.968310e-44), 0(0.000000e+00)
3369 ; CM-NEXT: LSHR T25.X, PV.W, literal.x,
3370 ; CM-NEXT: LSHR T2.Y, T19.W, literal.y,
3371 ; CM-NEXT: LSHR T2.Z, T19.X, literal.y,
3372 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
3373 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3374 ; CM-NEXT: 80(1.121039e-43), 0(0.000000e+00)
3375 ; CM-NEXT: LSHR T26.X, PV.W, literal.x,
3376 ; CM-NEXT: LSHR T3.Y, T19.Y, literal.y,
3377 ; CM-NEXT: LSHR T3.Z, T23.Z, literal.y,
3378 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
3379 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3380 ; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
3381 ; CM-NEXT: LSHR T27.X, PV.W, literal.x,
3382 ; CM-NEXT: LSHR T4.Y, T23.W, literal.y,
3383 ; CM-NEXT: LSHR T4.Z, T23.X, literal.y,
3384 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
3385 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3386 ; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00)
3387 ; CM-NEXT: LSHR T28.X, PV.W, literal.x,
3388 ; CM-NEXT: LSHR T5.Y, T23.Y, literal.y,
3389 ; CM-NEXT: BFE_INT T29.Z, T22.Y, 0.0, literal.y, BS:VEC_120/SCL_212
3390 ; CM-NEXT: LSHR * T0.W, T22.Z, literal.y,
3391 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3392 ; CM-NEXT: BFE_INT T29.X, T22.X, 0.0, literal.x,
3393 ; CM-NEXT: LSHR T6.Y, T22.W, literal.x,
3394 ; CM-NEXT: BFE_INT T30.Z, T22.W, 0.0, literal.x,
3395 ; CM-NEXT: LSHR * T1.W, T22.Y, literal.x,
3396 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3397 ; CM-NEXT: BFE_INT T30.X, T22.Z, 0.0, literal.x,
3398 ; CM-NEXT: LSHR T7.Y, T22.X, literal.x,
3399 ; CM-NEXT: BFE_INT T22.Z, T23.Y, 0.0, literal.x,
3400 ; CM-NEXT: BFE_INT * T29.W, PV.W, 0.0, literal.x,
3401 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3402 ; CM-NEXT: BFE_INT T22.X, T23.X, 0.0, literal.x,
3403 ; CM-NEXT: BFE_INT T29.Y, PV.Y, 0.0, literal.x,
3404 ; CM-NEXT: BFE_INT T31.Z, T23.W, 0.0, literal.x,
3405 ; CM-NEXT: BFE_INT * T30.W, T6.Y, 0.0, literal.x,
3406 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3407 ; CM-NEXT: BFE_INT T31.X, T23.Z, 0.0, literal.x,
3408 ; CM-NEXT: BFE_INT T30.Y, T0.W, 0.0, literal.x,
3409 ; CM-NEXT: BFE_INT T23.Z, T19.Y, 0.0, literal.x,
3410 ; CM-NEXT: BFE_INT * T22.W, T5.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3411 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3412 ; CM-NEXT: BFE_INT T23.X, T19.X, 0.0, literal.x,
3413 ; CM-NEXT: BFE_INT T22.Y, T4.Z, 0.0, literal.x,
3414 ; CM-NEXT: BFE_INT T32.Z, T19.W, 0.0, literal.x,
3415 ; CM-NEXT: BFE_INT * T31.W, T4.Y, 0.0, literal.x,
3416 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3417 ; CM-NEXT: BFE_INT T32.X, T19.Z, 0.0, literal.x,
3418 ; CM-NEXT: BFE_INT T31.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212
3419 ; CM-NEXT: BFE_INT T19.Z, T20.Y, 0.0, literal.x,
3420 ; CM-NEXT: BFE_INT * T23.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3421 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3422 ; CM-NEXT: BFE_INT T19.X, T20.X, 0.0, literal.x,
3423 ; CM-NEXT: BFE_INT T23.Y, T2.Z, 0.0, literal.x,
3424 ; CM-NEXT: BFE_INT T33.Z, T20.W, 0.0, literal.x,
3425 ; CM-NEXT: BFE_INT * T32.W, T2.Y, 0.0, literal.x,
3426 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3427 ; CM-NEXT: BFE_INT T33.X, T20.Z, 0.0, literal.x,
3428 ; CM-NEXT: BFE_INT T32.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212
3429 ; CM-NEXT: LSHR T1.Z, T20.X, literal.x,
3430 ; CM-NEXT: BFE_INT * T19.W, T1.Y, 0.0, literal.x,
3431 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3432 ; CM-NEXT: LSHR T20.X, KC0[2].Y, literal.x,
3433 ; CM-NEXT: BFE_INT T19.Y, PV.Z, 0.0, literal.y,
3434 ; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y,
3435 ; CM-NEXT: BFE_INT * T33.W, T0.Z, 0.0, literal.y,
3436 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3437 ; CM-NEXT: LSHR T34.X, PV.Z, literal.x,
3438 ; CM-NEXT: BFE_INT * T33.Y, T0.Y, 0.0, literal.y,
3439 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3440 %load = load <32 x i16>, ptr addrspace(1) %in
3441 %ext = sext <32 x i16> %load to <32 x i32>
3442 store <32 x i32> %ext, ptr addrspace(1) %out
3446 define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
3447 ; GCN-NOHSA-SI-LABEL: global_zextload_v64i16_to_v64i32:
3448 ; GCN-NOHSA-SI: ; %bb.0:
3449 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
3450 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
3451 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, -1
3452 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s15, 0xe8f000
3453 ; GCN-NOHSA-SI-NEXT: s_add_u32 s12, s12, s9
3454 ; GCN-NOHSA-SI-NEXT: s_addc_u32 s13, s13, 0
3455 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
3456 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
3457 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
3458 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
3459 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
3460 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
3461 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
3462 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
3463 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0
3464 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16
3465 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32
3466 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48
3467 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[27:30], off, s[8:11], 0 offset:64
3468 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[31:34], off, s[8:11], 0 offset:80
3469 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:96
3470 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:112
3471 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(7)
3472 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v15
3473 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v16, 16, v14
3474 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v13
3475 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v12
3476 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(6)
3477 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v26, 16, v11
3478 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v24, 16, v10
3479 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v17, 0xffff, v15
3480 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v15, 0xffff, v14
3481 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v15, off, s[12:15], 0 ; 4-byte Folded Spill
3482 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
3483 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v16, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
3484 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v17, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
3485 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v18, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
3486 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v13
3487 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
3488 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v12
3489 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v18, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill
3490 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
3491 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v19, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill
3492 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill
3493 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
3494 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(2)
3495 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v9
3496 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v8
3497 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v25, 0xffff, v11
3498 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v23, 0xffff, v10
3499 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v9
3500 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v8
3501 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v7
3502 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v6
3503 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v46, 16, v5
3504 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v44, 16, v4
3505 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v14, 0xffff, v7
3506 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v6
3507 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v45, 0xffff, v5
3508 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v43, 0xffff, v4
3509 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v3
3510 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v2
3511 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v50, 16, v1
3512 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v48, 16, v0
3513 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v3
3514 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v2
3515 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v49, 0xffff, v1
3516 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v47, 0xffff, v0
3517 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v54, 16, v30
3518 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v52, 16, v29
3519 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v58, 16, v28
3520 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v56, 16, v27
3521 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v53, 0xffff, v30
3522 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v51, 0xffff, v29
3523 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v57, 0xffff, v28
3524 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v55, 0xffff, v27
3525 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v30, 16, v34
3526 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v28, 16, v33
3527 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v62, 16, v32
3528 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v60, 16, v31
3529 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v29, 0xffff, v34
3530 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v27, 0xffff, v33
3531 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v61, 0xffff, v32
3532 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v59, 0xffff, v31
3533 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v34, 16, v38
3534 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v37
3535 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v36
3536 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v35
3537 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v33, 0xffff, v38
3538 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v31, 0xffff, v37
3539 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v36
3540 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v35
3541 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v38, 16, v42
3542 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v41
3543 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v40
3544 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v39
3545 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v37, 0xffff, v42
3546 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v35, 0xffff, v41
3547 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v40
3548 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v39
3549 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
3550 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
3551 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
3552 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:240
3553 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
3554 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:208
3555 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[59:62], off, s[0:3], 0 offset:160
3556 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:176
3557 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[55:58], off, s[0:3], 0 offset:128
3558 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:144
3559 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:96
3560 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
3561 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:64
3562 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80
3563 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
3564 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:48
3565 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
3566 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload
3567 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload
3568 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload
3569 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
3570 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3571 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
3572 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
3573 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
3574 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
3575 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
3576 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
3577 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3578 ; GCN-NOHSA-SI-NEXT: s_endpgm
3580 ; GCN-HSA-LABEL: global_zextload_v64i16_to_v64i32:
3582 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
3583 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
3584 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
3585 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
3586 ; GCN-HSA-NEXT: flat_load_dwordx4 v[24:27], v[0:1]
3587 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 16
3588 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
3589 ; GCN-HSA-NEXT: s_add_u32 s6, s2, 32
3590 ; GCN-HSA-NEXT: s_addc_u32 s7, s3, 0
3591 ; GCN-HSA-NEXT: s_add_u32 s8, s2, 48
3592 ; GCN-HSA-NEXT: s_addc_u32 s9, s3, 0
3593 ; GCN-HSA-NEXT: v_mov_b32_e32 v17, s9
3594 ; GCN-HSA-NEXT: s_add_u32 s10, s2, 64
3595 ; GCN-HSA-NEXT: v_mov_b32_e32 v16, s8
3596 ; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[16:17]
3597 ; GCN-HSA-NEXT: s_addc_u32 s11, s3, 0
3598 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10
3599 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s11
3600 ; GCN-HSA-NEXT: s_add_u32 s10, s2, 0x50
3601 ; GCN-HSA-NEXT: s_addc_u32 s11, s3, 0
3602 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10
3603 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11
3604 ; GCN-HSA-NEXT: s_add_u32 s10, s2, 0x60
3605 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
3606 ; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[4:5]
3607 ; GCN-HSA-NEXT: s_addc_u32 s11, s3, 0
3608 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10
3609 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 0x70
3610 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11
3611 ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0
3612 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
3613 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3
3614 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2
3615 ; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[12:13]
3616 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, s5
3617 ; GCN-HSA-NEXT: v_mov_b32_e32 v15, s7
3618 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s4
3619 ; GCN-HSA-NEXT: v_mov_b32_e32 v14, s6
3620 ; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[12:13]
3621 ; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[14:15]
3622 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
3623 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3624 ; GCN-HSA-NEXT: v_mov_b32_e32 v37, s1
3625 ; GCN-HSA-NEXT: v_mov_b32_e32 v36, s0
3626 ; GCN-HSA-NEXT: s_waitcnt vmcnt(7)
3627 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v35, 16, v25
3628 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v33, 16, v24
3629 ; GCN-HSA-NEXT: v_and_b32_e32 v34, 0xffff, v25
3630 ; GCN-HSA-NEXT: v_and_b32_e32 v32, 0xffff, v24
3631 ; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
3632 ; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
3633 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0
3634 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3635 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xf0
3636 ; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[32:35]
3637 ; GCN-HSA-NEXT: v_mov_b32_e32 v37, s3
3638 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
3639 ; GCN-HSA-NEXT: v_mov_b32_e32 v36, s2
3640 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0
3641 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3642 ; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xd0
3643 ; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0
3644 ; GCN-HSA-NEXT: s_add_u32 s8, s0, 0xa0
3645 ; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0
3646 ; GCN-HSA-NEXT: s_add_u32 s10, s0, 0xb0
3647 ; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0
3648 ; GCN-HSA-NEXT: s_add_u32 s12, s0, 0x80
3649 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v35, 16, v27
3650 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v33, 16, v26
3651 ; GCN-HSA-NEXT: v_and_b32_e32 v34, 0xffff, v27
3652 ; GCN-HSA-NEXT: v_and_b32_e32 v32, 0xffff, v26
3653 ; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0
3654 ; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[32:35]
3655 ; GCN-HSA-NEXT: s_waitcnt vmcnt(7)
3656 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v1
3657 ; GCN-HSA-NEXT: v_mov_b32_e32 v33, s13
3658 ; GCN-HSA-NEXT: v_mov_b32_e32 v32, s12
3659 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v0
3660 ; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v1
3661 ; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v0
3662 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8
3663 ; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[24:27]
3664 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9
3665 ; GCN-HSA-NEXT: s_waitcnt vmcnt(7)
3666 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v9
3667 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v8
3668 ; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v9
3669 ; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v8
3670 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s10
3671 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27]
3672 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s11
3673 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v11
3674 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v10
3675 ; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v11
3676 ; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v10
3677 ; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[24:27]
3678 ; GCN-HSA-NEXT: s_waitcnt vmcnt(8)
3679 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v5
3680 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v4
3681 ; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v5
3682 ; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v4
3683 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
3684 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
3685 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
3686 ; GCN-HSA-NEXT: v_mov_b32_e32 v33, s7
3687 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
3688 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v7
3689 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v6
3690 ; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v7
3691 ; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v6
3692 ; GCN-HSA-NEXT: v_mov_b32_e32 v32, s6
3693 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[8:11]
3694 ; GCN-HSA-NEXT: s_waitcnt vmcnt(8)
3695 ; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v28
3696 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v29
3697 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v28
3698 ; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v29
3699 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90
3700 ; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[24:27]
3701 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3702 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v31
3703 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v30
3704 ; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v31
3705 ; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v30
3706 ; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[7:10]
3707 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27]
3708 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
3709 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
3710 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
3711 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v3
3712 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v2
3713 ; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v3
3714 ; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v2
3715 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3716 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
3717 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v20
3718 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
3719 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
3720 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
3721 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3722 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
3723 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v21
3724 ; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v21
3725 ; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v20
3726 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
3727 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 64
3728 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3729 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v23
3730 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v22
3731 ; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v23
3732 ; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v22
3733 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3734 ; GCN-HSA-NEXT: s_waitcnt vmcnt(12)
3735 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v18
3736 ; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
3737 ; GCN-HSA-NEXT: s_waitcnt vmcnt(12)
3738 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v15
3739 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v17
3740 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v16
3741 ; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v18
3742 ; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v17
3743 ; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v16
3744 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v13
3745 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v12
3746 ; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v15
3747 ; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v13
3748 ; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v12
3749 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3
3750 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2
3751 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50
3752 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3753 ; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[15:18]
3754 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3
3755 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2
3756 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
3757 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v14
3758 ; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v14
3759 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
3760 ; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11]
3761 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 48
3762 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
3763 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
3764 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
3765 ; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
3766 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v19
3767 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
3768 ; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v19
3769 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
3770 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3771 ; GCN-HSA-NEXT: s_endpgm
3773 ; GCN-NOHSA-VI-LABEL: global_zextload_v64i16_to_v64i32:
3774 ; GCN-NOHSA-VI: ; %bb.0:
3775 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
3776 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
3777 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
3778 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s90, -1
3779 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s91, 0xe80000
3780 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
3781 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
3782 ; GCN-NOHSA-VI-NEXT: s_add_u32 s88, s88, s9
3783 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
3784 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
3785 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
3786 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
3787 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
3788 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0
3789 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16
3790 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32
3791 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48
3792 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[27:30], off, s[8:11], 0 offset:64
3793 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[31:34], off, s[8:11], 0 offset:80
3794 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:96
3795 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:112
3796 ; GCN-NOHSA-VI-NEXT: s_addc_u32 s89, s89, 0
3797 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
3798 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
3799 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7)
3800 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v18, 16, v15
3801 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v16, 16, v14
3802 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v17, 0xffff, v15
3803 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v15, 0xffff, v14
3804 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v15, off, s[88:91], 0 ; 4-byte Folded Spill
3805 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
3806 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v16, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill
3807 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v17, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill
3808 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v18, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill
3809 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v21, 16, v13
3810 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v12
3811 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v13
3812 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v12
3813 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v18, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill
3814 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
3815 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v19, off, s[88:91], 0 offset:20 ; 4-byte Folded Spill
3816 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v20, off, s[88:91], 0 offset:24 ; 4-byte Folded Spill
3817 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v21, off, s[88:91], 0 offset:28 ; 4-byte Folded Spill
3818 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v26, 16, v11
3819 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v24, 16, v10
3820 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v9
3821 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v8
3822 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v25, 0xffff, v11
3823 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v23, 0xffff, v10
3824 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v9
3825 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v8
3826 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v3
3827 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v2
3828 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v50, 16, v1
3829 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v48, 16, v0
3830 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v3
3831 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v2
3832 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v49, 0xffff, v1
3833 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v47, 0xffff, v0
3834 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v40
3835 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v39
3836 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v40
3837 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v39
3838 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v7
3839 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v6
3840 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v46, 16, v5
3841 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v44, 16, v4
3842 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, 0xffff, v7
3843 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v6
3844 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v45, 0xffff, v5
3845 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v43, 0xffff, v4
3846 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v54, 16, v30
3847 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v52, 16, v29
3848 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v58, 16, v28
3849 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v56, 16, v27
3850 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v53, 0xffff, v30
3851 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v51, 0xffff, v29
3852 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v57, 0xffff, v28
3853 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v55, 0xffff, v27
3854 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v30, 16, v34
3855 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v28, 16, v33
3856 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v62, 16, v32
3857 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v60, 16, v31
3858 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v29, 0xffff, v34
3859 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v27, 0xffff, v33
3860 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v61, 0xffff, v32
3861 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v59, 0xffff, v31
3862 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v34, 16, v38
3863 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v32, 16, v37
3864 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v36
3865 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v35
3866 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v33, 0xffff, v38
3867 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v31, 0xffff, v37
3868 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v36
3869 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v35
3870 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v38, 16, v42
3871 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v36, 16, v41
3872 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v37, 0xffff, v42
3873 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v35, 0xffff, v41
3874 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
3875 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:240
3876 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
3877 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:208
3878 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[59:62], off, s[0:3], 0 offset:160
3879 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:176
3880 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[55:58], off, s[0:3], 0 offset:128
3881 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:144
3882 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:96
3883 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
3884 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:64
3885 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80
3886 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
3887 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:48
3888 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload
3889 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:20 ; 4-byte Folded Reload
3890 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:24 ; 4-byte Folded Reload
3891 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:28 ; 4-byte Folded Reload
3892 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
3893 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3894 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 ; 4-byte Folded Reload
3895 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload
3896 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload
3897 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload
3898 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
3899 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3900 ; GCN-NOHSA-VI-NEXT: s_endpgm
3902 ; EG-LABEL: global_zextload_v64i16_to_v64i32:
3904 ; EG-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[]
3905 ; EG-NEXT: TEX 3 @22
3906 ; EG-NEXT: ALU 56, @39, KC0[CB0:0-32], KC1[]
3907 ; EG-NEXT: TEX 3 @30
3908 ; EG-NEXT: ALU 87, @96, KC0[CB0:0-32], KC1[]
3909 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T66.X, 0
3910 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T65.X, 0
3911 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T64.X, 0
3912 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T62.X, 0
3913 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T61.X, 0
3914 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T59.X, 0
3915 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T58.X, 0
3916 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T56.X, 0
3917 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T55.X, 0
3918 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T53.X, 0
3919 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T48.X, 0
3920 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T47.X, 0
3921 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T46.X, 0
3922 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T44.X, 0
3923 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T43.X, 0
3924 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T41.X, 1
3926 ; EG-NEXT: Fetch clause starting at 22:
3927 ; EG-NEXT: VTX_READ_128 T36.XYZW, T37.X, 0, #1
3928 ; EG-NEXT: VTX_READ_128 T38.XYZW, T37.X, 48, #1
3929 ; EG-NEXT: VTX_READ_128 T39.XYZW, T37.X, 32, #1
3930 ; EG-NEXT: VTX_READ_128 T40.XYZW, T37.X, 16, #1
3931 ; EG-NEXT: Fetch clause starting at 30:
3932 ; EG-NEXT: VTX_READ_128 T49.XYZW, T37.X, 112, #1
3933 ; EG-NEXT: VTX_READ_128 T50.XYZW, T37.X, 96, #1
3934 ; EG-NEXT: VTX_READ_128 T51.XYZW, T37.X, 80, #1
3935 ; EG-NEXT: VTX_READ_128 T52.XYZW, T37.X, 64, #1
3936 ; EG-NEXT: ALU clause starting at 38:
3937 ; EG-NEXT: MOV * T37.X, KC0[2].Z,
3938 ; EG-NEXT: ALU clause starting at 39:
3939 ; EG-NEXT: LSHR * T35.W, T36.W, literal.x,
3940 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3941 ; EG-NEXT: AND_INT * T35.Z, T36.W, literal.x,
3942 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
3943 ; EG-NEXT: LSHR T35.Y, T36.Z, literal.x,
3944 ; EG-NEXT: LSHR * T36.W, T36.Y, literal.x,
3945 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3946 ; EG-NEXT: AND_INT T35.X, T36.Z, literal.x,
3947 ; EG-NEXT: AND_INT T36.Z, T36.Y, literal.x,
3948 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
3949 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
3950 ; EG-NEXT: LSHR T41.X, PV.W, literal.x,
3951 ; EG-NEXT: LSHR T36.Y, T36.X, literal.y,
3952 ; EG-NEXT: LSHR T42.W, T40.W, literal.y,
3953 ; EG-NEXT: AND_INT * T36.X, T36.X, literal.z,
3954 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3955 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
3956 ; EG-NEXT: AND_INT * T42.Z, T40.W, literal.x,
3957 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
3958 ; EG-NEXT: LSHR T43.X, KC0[2].Y, literal.x,
3959 ; EG-NEXT: LSHR T42.Y, T40.Z, literal.y,
3960 ; EG-NEXT: LSHR T40.W, T40.Y, literal.y,
3961 ; EG-NEXT: AND_INT * T42.X, T40.Z, literal.z,
3962 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3963 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
3964 ; EG-NEXT: AND_INT T40.Z, T40.Y, literal.x,
3965 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
3966 ; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44)
3967 ; EG-NEXT: LSHR T44.X, PV.W, literal.x,
3968 ; EG-NEXT: LSHR T40.Y, T40.X, literal.y,
3969 ; EG-NEXT: LSHR T45.W, T39.W, literal.y,
3970 ; EG-NEXT: AND_INT * T40.X, T40.X, literal.z,
3971 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3972 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
3973 ; EG-NEXT: AND_INT T45.Z, T39.W, literal.x,
3974 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
3975 ; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44)
3976 ; EG-NEXT: LSHR T46.X, PV.W, literal.x,
3977 ; EG-NEXT: LSHR T45.Y, T39.Z, literal.y,
3978 ; EG-NEXT: LSHR T39.W, T39.Y, literal.y,
3979 ; EG-NEXT: AND_INT * T45.X, T39.Z, literal.z,
3980 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3981 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
3982 ; EG-NEXT: AND_INT T39.Z, T39.Y, literal.x,
3983 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
3984 ; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43)
3985 ; EG-NEXT: LSHR T47.X, PV.W, literal.x,
3986 ; EG-NEXT: LSHR T39.Y, T39.X, literal.y,
3987 ; EG-NEXT: AND_INT * T39.X, T39.X, literal.z,
3988 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
3989 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
3990 ; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x,
3991 ; EG-NEXT: LSHR * T37.W, T38.W, literal.y,
3992 ; EG-NEXT: 64(8.968310e-44), 16(2.242078e-44)
3993 ; EG-NEXT: LSHR T48.X, PV.W, literal.x,
3994 ; EG-NEXT: AND_INT * T37.Z, T38.W, literal.y,
3995 ; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
3996 ; EG-NEXT: ALU clause starting at 96:
3997 ; EG-NEXT: LSHR T37.Y, T38.Z, literal.x,
3998 ; EG-NEXT: LSHR * T38.W, T38.Y, literal.x,
3999 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4000 ; EG-NEXT: AND_INT T37.X, T38.Z, literal.x,
4001 ; EG-NEXT: AND_INT T38.Z, T38.Y, literal.x,
4002 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4003 ; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43)
4004 ; EG-NEXT: LSHR T53.X, PV.W, literal.x,
4005 ; EG-NEXT: LSHR T38.Y, T38.X, literal.y,
4006 ; EG-NEXT: LSHR T54.W, T52.W, literal.y,
4007 ; EG-NEXT: AND_INT * T38.X, T38.X, literal.z,
4008 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4009 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
4010 ; EG-NEXT: AND_INT T54.Z, T52.W, literal.x,
4011 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4012 ; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43)
4013 ; EG-NEXT: LSHR T55.X, PV.W, literal.x,
4014 ; EG-NEXT: LSHR T54.Y, T52.Z, literal.y,
4015 ; EG-NEXT: LSHR T52.W, T52.Y, literal.y,
4016 ; EG-NEXT: AND_INT * T54.X, T52.Z, literal.z,
4017 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4018 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
4019 ; EG-NEXT: AND_INT T52.Z, T52.Y, literal.x,
4020 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4021 ; EG-NEXT: 65535(9.183409e-41), 144(2.017870e-43)
4022 ; EG-NEXT: LSHR T56.X, PV.W, literal.x,
4023 ; EG-NEXT: LSHR T52.Y, T52.X, literal.y,
4024 ; EG-NEXT: LSHR T57.W, T51.W, literal.y,
4025 ; EG-NEXT: AND_INT * T52.X, T52.X, literal.z,
4026 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4027 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
4028 ; EG-NEXT: AND_INT T57.Z, T51.W, literal.x,
4029 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4030 ; EG-NEXT: 65535(9.183409e-41), 128(1.793662e-43)
4031 ; EG-NEXT: LSHR T58.X, PV.W, literal.x,
4032 ; EG-NEXT: LSHR T57.Y, T51.Z, literal.y,
4033 ; EG-NEXT: LSHR T51.W, T51.Y, literal.y,
4034 ; EG-NEXT: AND_INT * T57.X, T51.Z, literal.z,
4035 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4036 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
4037 ; EG-NEXT: AND_INT T51.Z, T51.Y, literal.x,
4038 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4039 ; EG-NEXT: 65535(9.183409e-41), 176(2.466285e-43)
4040 ; EG-NEXT: LSHR T59.X, PV.W, literal.x,
4041 ; EG-NEXT: LSHR T51.Y, T51.X, literal.y,
4042 ; EG-NEXT: LSHR T60.W, T50.W, literal.y,
4043 ; EG-NEXT: AND_INT * T51.X, T51.X, literal.z,
4044 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4045 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
4046 ; EG-NEXT: AND_INT T60.Z, T50.W, literal.x,
4047 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4048 ; EG-NEXT: 65535(9.183409e-41), 160(2.242078e-43)
4049 ; EG-NEXT: LSHR T61.X, PV.W, literal.x,
4050 ; EG-NEXT: LSHR T60.Y, T50.Z, literal.y,
4051 ; EG-NEXT: LSHR T50.W, T50.Y, literal.y,
4052 ; EG-NEXT: AND_INT * T60.X, T50.Z, literal.z,
4053 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4054 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
4055 ; EG-NEXT: AND_INT T50.Z, T50.Y, literal.x,
4056 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4057 ; EG-NEXT: 65535(9.183409e-41), 208(2.914701e-43)
4058 ; EG-NEXT: LSHR T62.X, PV.W, literal.x,
4059 ; EG-NEXT: LSHR T50.Y, T50.X, literal.y,
4060 ; EG-NEXT: LSHR T63.W, T49.W, literal.y,
4061 ; EG-NEXT: AND_INT * T50.X, T50.X, literal.z,
4062 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4063 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
4064 ; EG-NEXT: AND_INT T63.Z, T49.W, literal.x,
4065 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4066 ; EG-NEXT: 65535(9.183409e-41), 192(2.690493e-43)
4067 ; EG-NEXT: LSHR T64.X, PV.W, literal.x,
4068 ; EG-NEXT: LSHR T63.Y, T49.Z, literal.y,
4069 ; EG-NEXT: LSHR T49.W, T49.Y, literal.y,
4070 ; EG-NEXT: AND_INT * T63.X, T49.Z, literal.z,
4071 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4072 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
4073 ; EG-NEXT: AND_INT T49.Z, T49.Y, literal.x,
4074 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4075 ; EG-NEXT: 65535(9.183409e-41), 240(3.363116e-43)
4076 ; EG-NEXT: LSHR T65.X, PV.W, literal.x,
4077 ; EG-NEXT: LSHR T49.Y, T49.X, literal.y,
4078 ; EG-NEXT: AND_INT * T49.X, T49.X, literal.z,
4079 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4080 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
4081 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
4082 ; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00)
4083 ; EG-NEXT: LSHR * T66.X, PV.W, literal.x,
4084 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4086 ; CM-LABEL: global_zextload_v64i16_to_v64i32:
4088 ; CM-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[]
4089 ; CM-NEXT: TEX 3 @22
4090 ; CM-NEXT: ALU 50, @39, KC0[CB0:0-32], KC1[]
4091 ; CM-NEXT: TEX 3 @30
4092 ; CM-NEXT: ALU 78, @90, KC0[CB0:0-32], KC1[]
4093 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T65, T66.X
4094 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T63, T48.X
4095 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T62, T64.X
4096 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T60, T49.X
4097 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T59, T61.X
4098 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T57, T50.X
4099 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T56, T58.X
4100 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T54, T51.X
4101 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T53, T55.X
4102 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T37.X
4103 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T47, T52.X
4104 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T38.X
4105 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T46.X
4106 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T39.X
4107 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T43.X
4108 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T40, T36.X
4110 ; CM-NEXT: Fetch clause starting at 22:
4111 ; CM-NEXT: VTX_READ_128 T36.XYZW, T35.X, 112, #1
4112 ; CM-NEXT: VTX_READ_128 T37.XYZW, T35.X, 64, #1
4113 ; CM-NEXT: VTX_READ_128 T38.XYZW, T35.X, 80, #1
4114 ; CM-NEXT: VTX_READ_128 T39.XYZW, T35.X, 96, #1
4115 ; CM-NEXT: Fetch clause starting at 30:
4116 ; CM-NEXT: VTX_READ_128 T48.XYZW, T35.X, 0, #1
4117 ; CM-NEXT: VTX_READ_128 T49.XYZW, T35.X, 16, #1
4118 ; CM-NEXT: VTX_READ_128 T50.XYZW, T35.X, 32, #1
4119 ; CM-NEXT: VTX_READ_128 T51.XYZW, T35.X, 48, #1
4120 ; CM-NEXT: ALU clause starting at 38:
4121 ; CM-NEXT: MOV * T35.X, KC0[2].Z,
4122 ; CM-NEXT: ALU clause starting at 39:
4123 ; CM-NEXT: LSHR * T40.W, T36.Y, literal.x,
4124 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4125 ; CM-NEXT: AND_INT * T40.Z, T36.Y, literal.x,
4126 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
4127 ; CM-NEXT: LSHR T40.Y, T36.X, literal.x,
4128 ; CM-NEXT: LSHR * T41.W, T36.W, literal.x,
4129 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4130 ; CM-NEXT: AND_INT T40.X, T36.X, literal.x,
4131 ; CM-NEXT: AND_INT T41.Z, T36.W, literal.x,
4132 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4133 ; CM-NEXT: 65535(9.183409e-41), 224(3.138909e-43)
4134 ; CM-NEXT: LSHR T36.X, PV.W, literal.x,
4135 ; CM-NEXT: LSHR T41.Y, T36.Z, literal.y,
4136 ; CM-NEXT: LSHR * T42.W, T39.Y, literal.y,
4137 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4138 ; CM-NEXT: AND_INT T41.X, T36.Z, literal.x,
4139 ; CM-NEXT: AND_INT T42.Z, T39.Y, literal.x,
4140 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4141 ; CM-NEXT: 65535(9.183409e-41), 240(3.363116e-43)
4142 ; CM-NEXT: LSHR T43.X, PV.W, literal.x,
4143 ; CM-NEXT: LSHR T42.Y, T39.X, literal.y,
4144 ; CM-NEXT: LSHR * T44.W, T39.W, literal.y,
4145 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4146 ; CM-NEXT: AND_INT T42.X, T39.X, literal.x,
4147 ; CM-NEXT: AND_INT T44.Z, T39.W, literal.x,
4148 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4149 ; CM-NEXT: 65535(9.183409e-41), 192(2.690493e-43)
4150 ; CM-NEXT: LSHR T39.X, PV.W, literal.x,
4151 ; CM-NEXT: LSHR T44.Y, T39.Z, literal.y,
4152 ; CM-NEXT: LSHR * T45.W, T38.Y, literal.y,
4153 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4154 ; CM-NEXT: AND_INT T44.X, T39.Z, literal.x,
4155 ; CM-NEXT: AND_INT T45.Z, T38.Y, literal.x,
4156 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4157 ; CM-NEXT: 65535(9.183409e-41), 208(2.914701e-43)
4158 ; CM-NEXT: LSHR T46.X, PV.W, literal.x,
4159 ; CM-NEXT: LSHR T45.Y, T38.X, literal.y,
4160 ; CM-NEXT: LSHR * T47.W, T38.W, literal.y,
4161 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4162 ; CM-NEXT: AND_INT T45.X, T38.X, literal.x,
4163 ; CM-NEXT: AND_INT T47.Z, T38.W, literal.x,
4164 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4165 ; CM-NEXT: 65535(9.183409e-41), 160(2.242078e-43)
4166 ; CM-NEXT: LSHR T38.X, PV.W, literal.x,
4167 ; CM-NEXT: LSHR T47.Y, T38.Z, literal.y,
4168 ; CM-NEXT: LSHR * T35.W, T37.Y, literal.y,
4169 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4170 ; CM-NEXT: AND_INT T47.X, T38.Z, literal.x,
4171 ; CM-NEXT: AND_INT T35.Z, T37.Y, literal.x,
4172 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4173 ; CM-NEXT: 65535(9.183409e-41), 176(2.466285e-43)
4174 ; CM-NEXT: ALU clause starting at 90:
4175 ; CM-NEXT: LSHR T52.X, T0.W, literal.x,
4176 ; CM-NEXT: LSHR T35.Y, T37.X, literal.y,
4177 ; CM-NEXT: LSHR * T53.W, T37.W, literal.y, BS:VEC_120/SCL_212
4178 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4179 ; CM-NEXT: AND_INT T35.X, T37.X, literal.x,
4180 ; CM-NEXT: AND_INT T53.Z, T37.W, literal.x,
4181 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4182 ; CM-NEXT: 65535(9.183409e-41), 128(1.793662e-43)
4183 ; CM-NEXT: LSHR T37.X, PV.W, literal.x,
4184 ; CM-NEXT: LSHR T53.Y, T37.Z, literal.y,
4185 ; CM-NEXT: LSHR * T54.W, T51.Y, literal.y,
4186 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4187 ; CM-NEXT: AND_INT T53.X, T37.Z, literal.x,
4188 ; CM-NEXT: AND_INT T54.Z, T51.Y, literal.x,
4189 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4190 ; CM-NEXT: 65535(9.183409e-41), 144(2.017870e-43)
4191 ; CM-NEXT: LSHR T55.X, PV.W, literal.x,
4192 ; CM-NEXT: LSHR T54.Y, T51.X, literal.y,
4193 ; CM-NEXT: LSHR * T56.W, T51.W, literal.y,
4194 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4195 ; CM-NEXT: AND_INT T54.X, T51.X, literal.x,
4196 ; CM-NEXT: AND_INT T56.Z, T51.W, literal.x,
4197 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4198 ; CM-NEXT: 65535(9.183409e-41), 96(1.345247e-43)
4199 ; CM-NEXT: LSHR T51.X, PV.W, literal.x,
4200 ; CM-NEXT: LSHR T56.Y, T51.Z, literal.y,
4201 ; CM-NEXT: LSHR * T57.W, T50.Y, literal.y,
4202 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4203 ; CM-NEXT: AND_INT T56.X, T51.Z, literal.x,
4204 ; CM-NEXT: AND_INT T57.Z, T50.Y, literal.x,
4205 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4206 ; CM-NEXT: 65535(9.183409e-41), 112(1.569454e-43)
4207 ; CM-NEXT: LSHR T58.X, PV.W, literal.x,
4208 ; CM-NEXT: LSHR T57.Y, T50.X, literal.y,
4209 ; CM-NEXT: LSHR * T59.W, T50.W, literal.y,
4210 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4211 ; CM-NEXT: AND_INT T57.X, T50.X, literal.x,
4212 ; CM-NEXT: AND_INT T59.Z, T50.W, literal.x,
4213 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4214 ; CM-NEXT: 65535(9.183409e-41), 64(8.968310e-44)
4215 ; CM-NEXT: LSHR T50.X, PV.W, literal.x,
4216 ; CM-NEXT: LSHR T59.Y, T50.Z, literal.y,
4217 ; CM-NEXT: LSHR * T60.W, T49.Y, literal.y,
4218 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4219 ; CM-NEXT: AND_INT T59.X, T50.Z, literal.x,
4220 ; CM-NEXT: AND_INT T60.Z, T49.Y, literal.x,
4221 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4222 ; CM-NEXT: 65535(9.183409e-41), 80(1.121039e-43)
4223 ; CM-NEXT: LSHR T61.X, PV.W, literal.x,
4224 ; CM-NEXT: LSHR T60.Y, T49.X, literal.y,
4225 ; CM-NEXT: LSHR * T62.W, T49.W, literal.y,
4226 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4227 ; CM-NEXT: AND_INT T60.X, T49.X, literal.x,
4228 ; CM-NEXT: AND_INT T62.Z, T49.W, literal.x,
4229 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4230 ; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44)
4231 ; CM-NEXT: LSHR T49.X, PV.W, literal.x,
4232 ; CM-NEXT: LSHR T62.Y, T49.Z, literal.y,
4233 ; CM-NEXT: LSHR * T63.W, T48.Y, literal.y,
4234 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4235 ; CM-NEXT: AND_INT T62.X, T49.Z, literal.x,
4236 ; CM-NEXT: AND_INT T63.Z, T48.Y, literal.x,
4237 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4238 ; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44)
4239 ; CM-NEXT: LSHR T64.X, PV.W, literal.x,
4240 ; CM-NEXT: LSHR T63.Y, T48.X, literal.y,
4241 ; CM-NEXT: LSHR * T65.W, T48.W, literal.y,
4242 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4243 ; CM-NEXT: AND_INT T63.X, T48.X, literal.x,
4244 ; CM-NEXT: AND_INT * T65.Z, T48.W, literal.x,
4245 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
4246 ; CM-NEXT: LSHR T48.X, KC0[2].Y, literal.x,
4247 ; CM-NEXT: LSHR * T65.Y, T48.Z, literal.y,
4248 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4249 ; CM-NEXT: AND_INT T65.X, T48.Z, literal.x,
4250 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4251 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
4252 ; CM-NEXT: LSHR * T66.X, PV.W, literal.x,
4253 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4254 %load = load <64 x i16>, ptr addrspace(1) %in
4255 %ext = zext <64 x i16> %load to <64 x i32>
4256 store <64 x i32> %ext, ptr addrspace(1) %out
4260 define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
4261 ; GCN-NOHSA-SI-LABEL: global_sextload_v64i16_to_v64i32:
4262 ; GCN-NOHSA-SI: ; %bb.0:
4263 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
4264 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
4265 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, -1
4266 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s15, 0xe8f000
4267 ; GCN-NOHSA-SI-NEXT: s_add_u32 s12, s12, s9
4268 ; GCN-NOHSA-SI-NEXT: s_addc_u32 s13, s13, 0
4269 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
4270 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
4271 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
4272 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
4273 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
4274 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
4275 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s6
4276 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s7
4277 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s2
4278 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, s3
4279 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:112
4280 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:96
4281 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:80
4282 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:64
4283 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0
4284 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[32:35], off, s[4:7], 0 offset:16
4285 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[36:39], off, s[4:7], 0 offset:32
4286 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[40:43], off, s[4:7], 0 offset:48
4287 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3)
4288 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v11
4289 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v10
4290 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v11, 0, 16
4291 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v10, 0, 16
4292 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
4293 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
4294 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
4295 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
4296 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
4297 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v9
4298 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v8
4299 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v9, 0, 16
4300 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v8, 0, 16
4301 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v35
4302 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v34
4303 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v35, 0, 16
4304 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v34, 0, 16
4305 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 16, v33
4306 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 16, v32
4307 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v14, v33, 0, 16
4308 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v32, 0, 16
4309 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v35, 16, v39
4310 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v33, 16, v38
4311 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v34, v39, 0, 16
4312 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v32, v38, 0, 16
4313 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v47, 16, v37
4314 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v45, 16, v36
4315 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v46, v37, 0, 16
4316 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v44, v36, 0, 16
4317 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v39, 16, v43
4318 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v37, 16, v42
4319 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v38, v43, 0, 16
4320 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v36, v42, 0, 16
4321 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v51, 16, v41
4322 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v49, 16, v40
4323 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v50, v41, 0, 16
4324 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v48, v40, 0, 16
4325 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v43, 16, v31
4326 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v41, 16, v30
4327 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v42, v31, 0, 16
4328 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v40, v30, 0, 16
4329 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v55, 16, v29
4330 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v53, 16, v28
4331 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v54, v29, 0, 16
4332 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v52, v28, 0, 16
4333 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v30, 16, v27
4334 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v28, 16, v26
4335 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v29, v27, 0, 16
4336 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v27, v26, 0, 16
4337 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v59, 16, v25
4338 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v57, 16, v24
4339 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v58, v25, 0, 16
4340 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v56, v24, 0, 16
4341 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 16, v23
4342 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 16, v22
4343 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v25, v23, 0, 16
4344 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v22, 0, 16
4345 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v63, 16, v21
4346 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v61, 16, v20
4347 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v62, v21, 0, 16
4348 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v60, v20, 0, 16
4349 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v22, 16, v19
4350 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v20, 16, v18
4351 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v21, v19, 0, 16
4352 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v19, v18, 0, 16
4353 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
4354 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v17
4355 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v16
4356 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v17, 0, 16
4357 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v16, 0, 16
4358 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
4359 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:240
4360 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192
4361 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:208
4362 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160
4363 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:176
4364 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128
4365 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:144
4366 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96
4367 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:112
4368 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64
4369 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:80
4370 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
4371 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
4372 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
4373 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
4374 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
4375 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
4376 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
4377 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
4378 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4379 ; GCN-NOHSA-SI-NEXT: s_endpgm
4381 ; GCN-HSA-LABEL: global_sextload_v64i16_to_v64i32:
4383 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
4384 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
4385 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
4386 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
4387 ; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[0:1]
4388 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x70
4389 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
4390 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
4391 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
4392 ; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[0:1]
4393 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x60
4394 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
4395 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
4396 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
4397 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x50
4398 ; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
4399 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
4400 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
4401 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
4402 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 64
4403 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
4404 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
4405 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
4406 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
4407 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 48
4408 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
4409 ; GCN-HSA-NEXT: s_add_u32 s6, s2, 32
4410 ; GCN-HSA-NEXT: s_addc_u32 s7, s3, 0
4411 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 16
4412 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
4413 ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0
4414 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
4415 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
4416 ; GCN-HSA-NEXT: flat_load_dwordx4 v[24:27], v[8:9]
4417 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5
4418 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4
4419 ; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
4420 ; GCN-HSA-NEXT: v_mov_b32_e32 v17, s7
4421 ; GCN-HSA-NEXT: v_mov_b32_e32 v16, s6
4422 ; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
4423 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
4424 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4425 ; GCN-HSA-NEXT: v_mov_b32_e32 v37, s1
4426 ; GCN-HSA-NEXT: v_mov_b32_e32 v36, s0
4427 ; GCN-HSA-NEXT: s_waitcnt vmcnt(7)
4428 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v35, 16, v29
4429 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v33, 16, v28
4430 ; GCN-HSA-NEXT: v_bfe_i32 v34, v29, 0, 16
4431 ; GCN-HSA-NEXT: v_bfe_i32 v32, v28, 0, 16
4432 ; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3
4433 ; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2
4434 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0
4435 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4436 ; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[32:35]
4437 ; GCN-HSA-NEXT: v_mov_b32_e32 v36, s3
4438 ; GCN-HSA-NEXT: v_mov_b32_e32 v35, s2
4439 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0
4440 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v34, 16, v31
4441 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v32, 16, v30
4442 ; GCN-HSA-NEXT: v_bfe_i32 v33, v31, 0, 16
4443 ; GCN-HSA-NEXT: v_bfe_i32 v31, v30, 0, 16
4444 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4445 ; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[31:34]
4446 ; GCN-HSA-NEXT: s_waitcnt vmcnt(8)
4447 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v29, 16, v20
4448 ; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3
4449 ; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2
4450 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0
4451 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4452 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v31, 16, v21
4453 ; GCN-HSA-NEXT: v_bfe_i32 v30, v21, 0, 16
4454 ; GCN-HSA-NEXT: v_bfe_i32 v28, v20, 0, 16
4455 ; GCN-HSA-NEXT: flat_store_dwordx4 v[35:36], v[28:31]
4456 ; GCN-HSA-NEXT: v_mov_b32_e32 v35, s3
4457 ; GCN-HSA-NEXT: v_mov_b32_e32 v34, s2
4458 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0
4459 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4460 ; GCN-HSA-NEXT: v_mov_b32_e32 v37, s3
4461 ; GCN-HSA-NEXT: v_mov_b32_e32 v36, s2
4462 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0
4463 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v31, 16, v23
4464 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v29, 16, v22
4465 ; GCN-HSA-NEXT: v_bfe_i32 v30, v23, 0, 16
4466 ; GCN-HSA-NEXT: v_bfe_i32 v28, v22, 0, 16
4467 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4468 ; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[28:31]
4469 ; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3
4470 ; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2
4471 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0
4472 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4473 ; GCN-HSA-NEXT: v_mov_b32_e32 v39, s3
4474 ; GCN-HSA-NEXT: s_waitcnt vmcnt(9)
4475 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 16, v13
4476 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 16, v12
4477 ; GCN-HSA-NEXT: v_bfe_i32 v22, v13, 0, 16
4478 ; GCN-HSA-NEXT: v_bfe_i32 v20, v12, 0, 16
4479 ; GCN-HSA-NEXT: v_mov_b32_e32 v38, s2
4480 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80
4481 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v31, 16, v15
4482 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v29, 16, v14
4483 ; GCN-HSA-NEXT: v_bfe_i32 v30, v15, 0, 16
4484 ; GCN-HSA-NEXT: v_bfe_i32 v28, v14, 0, 16
4485 ; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[20:23]
4486 ; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[28:31]
4487 ; GCN-HSA-NEXT: s_waitcnt vmcnt(10)
4488 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v5
4489 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v4
4490 ; GCN-HSA-NEXT: v_bfe_i32 v14, v5, 0, 16
4491 ; GCN-HSA-NEXT: v_bfe_i32 v12, v4, 0, 16
4492 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 16, v7
4493 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 16, v6
4494 ; GCN-HSA-NEXT: v_bfe_i32 v22, v7, 0, 16
4495 ; GCN-HSA-NEXT: v_bfe_i32 v20, v6, 0, 16
4496 ; GCN-HSA-NEXT: s_waitcnt vmcnt(9)
4497 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v1
4498 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v0
4499 ; GCN-HSA-NEXT: v_bfe_i32 v6, v1, 0, 16
4500 ; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16
4501 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4502 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
4503 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
4504 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90
4505 ; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[12:15]
4506 ; GCN-HSA-NEXT: flat_store_dwordx4 v[38:39], v[20:23]
4507 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
4508 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4509 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
4510 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
4511 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
4512 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 16, v3
4513 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 16, v2
4514 ; GCN-HSA-NEXT: v_bfe_i32 v22, v3, 0, 16
4515 ; GCN-HSA-NEXT: v_bfe_i32 v20, v2, 0, 16
4516 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4517 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[20:23]
4518 ; GCN-HSA-NEXT: s_waitcnt vmcnt(11)
4519 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v9
4520 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v8
4521 ; GCN-HSA-NEXT: v_bfe_i32 v2, v9, 0, 16
4522 ; GCN-HSA-NEXT: v_bfe_i32 v0, v8, 0, 16
4523 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
4524 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
4525 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
4526 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4527 ; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
4528 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
4529 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
4530 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 64
4531 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v24
4532 ; GCN-HSA-NEXT: v_bfe_i32 v12, v24, 0, 16
4533 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v24, 16, v11
4534 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v22, 16, v10
4535 ; GCN-HSA-NEXT: v_bfe_i32 v23, v11, 0, 16
4536 ; GCN-HSA-NEXT: v_bfe_i32 v21, v10, 0, 16
4537 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4538 ; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[21:24]
4539 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
4540 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
4541 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50
4542 ; GCN-HSA-NEXT: s_waitcnt vmcnt(12)
4543 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v19
4544 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v18
4545 ; GCN-HSA-NEXT: v_bfe_i32 v2, v19, 0, 16
4546 ; GCN-HSA-NEXT: v_bfe_i32 v0, v18, 0, 16
4547 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v20, 16, v17
4548 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v16
4549 ; GCN-HSA-NEXT: v_bfe_i32 v19, v17, 0, 16
4550 ; GCN-HSA-NEXT: v_bfe_i32 v17, v16, 0, 16
4551 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4552 ; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[17:20]
4553 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3
4554 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2
4555 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
4556 ; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
4557 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
4558 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
4559 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v25
4560 ; GCN-HSA-NEXT: v_bfe_i32 v14, v25, 0, 16
4561 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
4562 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 48
4563 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
4564 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
4565 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
4566 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v27
4567 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v26
4568 ; GCN-HSA-NEXT: v_bfe_i32 v6, v27, 0, 16
4569 ; GCN-HSA-NEXT: v_bfe_i32 v4, v26, 0, 16
4570 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
4571 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
4572 ; GCN-HSA-NEXT: s_endpgm
4574 ; GCN-NOHSA-VI-LABEL: global_sextload_v64i16_to_v64i32:
4575 ; GCN-NOHSA-VI: ; %bb.0:
4576 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
4577 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
4578 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
4579 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s90, -1
4580 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s91, 0xe80000
4581 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
4582 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
4583 ; GCN-NOHSA-VI-NEXT: s_add_u32 s88, s88, s9
4584 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
4585 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
4586 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
4587 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
4588 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
4589 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0
4590 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16
4591 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32
4592 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48
4593 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[23:26], off, s[8:11], 0 offset:64
4594 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[27:30], off, s[8:11], 0 offset:80
4595 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[31:34], off, s[8:11], 0 offset:96
4596 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:112
4597 ; GCN-NOHSA-VI-NEXT: s_addc_u32 s89, s89, 0
4598 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
4599 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
4600 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7)
4601 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 16, v15
4602 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 16, v14
4603 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v15, 0, 16
4604 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v14, 0, 16
4605 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v15, off, s[88:91], 0 ; 4-byte Folded Spill
4606 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
4607 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v16, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill
4608 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v17, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill
4609 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v18, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill
4610 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 16, v13
4611 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 16, v12
4612 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v13, 0, 16
4613 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v12, 0, 16
4614 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v13, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill
4615 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
4616 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v14, off, s[88:91], 0 offset:20 ; 4-byte Folded Spill
4617 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v15, off, s[88:91], 0 offset:24 ; 4-byte Folded Spill
4618 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v16, off, s[88:91], 0 offset:28 ; 4-byte Folded Spill
4619 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v11
4620 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v10
4621 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v11, 0, 16
4622 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v10, 0, 16
4623 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v42, 16, v9
4624 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v40, 16, v8
4625 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v41, v9, 0, 16
4626 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v39, v8, 0, 16
4627 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v3
4628 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v2
4629 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v3, 0, 16
4630 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v2, 0, 16
4631 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v50, 16, v1
4632 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v48, 16, v0
4633 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v49, v1, 0, 16
4634 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v47, v0, 0, 16
4635 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v36
4636 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v35
4637 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v36, 0, 16
4638 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v35, 0, 16
4639 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 16, v7
4640 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 16, v6
4641 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v7, 0, 16
4642 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v6, 0, 16
4643 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v46, 16, v5
4644 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v44, 16, v4
4645 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v45, v5, 0, 16
4646 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v43, v4, 0, 16
4647 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v54, 16, v26
4648 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v52, 16, v25
4649 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v53, v26, 0, 16
4650 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v51, v25, 0, 16
4651 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v58, 16, v24
4652 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v56, 16, v23
4653 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v57, v24, 0, 16
4654 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v55, v23, 0, 16
4655 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v26, 16, v30
4656 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v24, 16, v29
4657 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v25, v30, 0, 16
4658 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v23, v29, 0, 16
4659 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v62, 16, v28
4660 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v60, 16, v27
4661 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v61, v28, 0, 16
4662 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v59, v27, 0, 16
4663 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v30, 16, v34
4664 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v28, 16, v33
4665 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v29, v34, 0, 16
4666 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v27, v33, 0, 16
4667 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v32
4668 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v31
4669 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v32, 0, 16
4670 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v31, 0, 16
4671 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v34, 16, v38
4672 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v32, 16, v37
4673 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v33, v38, 0, 16
4674 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v31, v37, 0, 16
4675 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
4676 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:240
4677 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
4678 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:208
4679 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[59:62], off, s[0:3], 0 offset:160
4680 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:176
4681 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[55:58], off, s[0:3], 0 offset:128
4682 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:144
4683 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:96
4684 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
4685 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:64
4686 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80
4687 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[39:42], off, s[0:3], 0 offset:32
4688 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
4689 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload
4690 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:20 ; 4-byte Folded Reload
4691 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:24 ; 4-byte Folded Reload
4692 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:28 ; 4-byte Folded Reload
4693 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
4694 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4695 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 ; 4-byte Folded Reload
4696 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload
4697 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload
4698 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload
4699 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
4700 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4701 ; GCN-NOHSA-VI-NEXT: s_endpgm
4703 ; EG-LABEL: global_sextload_v64i16_to_v64i32:
4705 ; EG-NEXT: ALU 18, @38, KC0[CB0:0-32], KC1[]
4706 ; EG-NEXT: TEX 7 @22
4707 ; EG-NEXT: ALU 75, @57, KC0[CB0:0-32], KC1[]
4708 ; EG-NEXT: ALU 71, @133, KC0[CB0:0-32], KC1[]
4709 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T41.X, 0
4710 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0
4711 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T56.X, 0
4712 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T55.X, 0
4713 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T54.X, 0
4714 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T53.X, 0
4715 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T52.X, 0
4716 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T51.X, 0
4717 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T50.X, 0
4718 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T49.X, 0
4719 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T40.X, 0
4720 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T39.X, 0
4721 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T38.X, 0
4722 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0
4723 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0
4724 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1
4727 ; EG-NEXT: Fetch clause starting at 22:
4728 ; EG-NEXT: VTX_READ_128 T42.XYZW, T41.X, 16, #1
4729 ; EG-NEXT: VTX_READ_128 T43.XYZW, T41.X, 32, #1
4730 ; EG-NEXT: VTX_READ_128 T44.XYZW, T41.X, 0, #1
4731 ; EG-NEXT: VTX_READ_128 T45.XYZW, T41.X, 48, #1
4732 ; EG-NEXT: VTX_READ_128 T46.XYZW, T41.X, 64, #1
4733 ; EG-NEXT: VTX_READ_128 T47.XYZW, T41.X, 80, #1
4734 ; EG-NEXT: VTX_READ_128 T48.XYZW, T41.X, 96, #1
4735 ; EG-NEXT: VTX_READ_128 T41.XYZW, T41.X, 112, #1
4736 ; EG-NEXT: ALU clause starting at 38:
4737 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
4738 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4739 ; EG-NEXT: LSHR T35.X, PV.W, literal.x,
4740 ; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.x,
4741 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4742 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
4743 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
4744 ; EG-NEXT: LSHR T37.X, PV.W, literal.x,
4745 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4746 ; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
4747 ; EG-NEXT: LSHR T38.X, PV.W, literal.x,
4748 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4749 ; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
4750 ; EG-NEXT: LSHR T39.X, PV.W, literal.x,
4751 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4752 ; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
4753 ; EG-NEXT: LSHR T40.X, PV.W, literal.x,
4754 ; EG-NEXT: MOV * T41.X, KC0[2].Z,
4755 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4756 ; EG-NEXT: ALU clause starting at 57:
4757 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
4758 ; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00)
4759 ; EG-NEXT: LSHR T49.X, PV.W, literal.x,
4760 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4761 ; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
4762 ; EG-NEXT: LSHR T50.X, PV.W, literal.x,
4763 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4764 ; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43)
4765 ; EG-NEXT: LSHR T51.X, PV.W, literal.x,
4766 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4767 ; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43)
4768 ; EG-NEXT: LSHR T52.X, PV.W, literal.x,
4769 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4770 ; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43)
4771 ; EG-NEXT: LSHR T53.X, PV.W, literal.x,
4772 ; EG-NEXT: LSHR T0.Y, T41.Y, literal.y,
4773 ; EG-NEXT: LSHR T0.Z, T41.W, literal.y,
4774 ; EG-NEXT: LSHR T0.W, T48.Y, literal.y, BS:VEC_120/SCL_212
4775 ; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
4776 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4777 ; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00)
4778 ; EG-NEXT: LSHR T54.X, PS, literal.x,
4779 ; EG-NEXT: LSHR T1.Y, T48.W, literal.y,
4780 ; EG-NEXT: LSHR T1.Z, T47.Y, literal.y,
4781 ; EG-NEXT: LSHR T1.W, T47.W, literal.y, BS:VEC_120/SCL_212
4782 ; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.z,
4783 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4784 ; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00)
4785 ; EG-NEXT: LSHR T55.X, PS, literal.x,
4786 ; EG-NEXT: LSHR T2.Y, T46.Y, literal.y,
4787 ; EG-NEXT: LSHR T2.Z, T46.W, literal.y,
4788 ; EG-NEXT: LSHR T2.W, T45.Y, literal.y, BS:VEC_120/SCL_212
4789 ; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z,
4790 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4791 ; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00)
4792 ; EG-NEXT: LSHR T56.X, PS, literal.x,
4793 ; EG-NEXT: LSHR T3.Y, T45.W, literal.y,
4794 ; EG-NEXT: BFE_INT T57.Z, T44.W, 0.0, literal.y, BS:VEC_120/SCL_212
4795 ; EG-NEXT: LSHR T3.W, T43.Y, literal.y,
4796 ; EG-NEXT: LSHR * T4.W, T43.W, literal.y,
4797 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4798 ; EG-NEXT: BFE_INT T57.X, T44.Z, 0.0, literal.x,
4799 ; EG-NEXT: LSHR T4.Y, T42.Y, literal.x,
4800 ; EG-NEXT: BFE_INT T58.Z, T44.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4801 ; EG-NEXT: LSHR T5.W, T42.W, literal.x,
4802 ; EG-NEXT: LSHR * T6.W, T44.W, literal.x,
4803 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4804 ; EG-NEXT: BFE_INT T58.X, T44.X, 0.0, literal.x,
4805 ; EG-NEXT: LSHR T5.Y, T44.Y, literal.x,
4806 ; EG-NEXT: BFE_INT T59.Z, T42.W, 0.0, literal.x,
4807 ; EG-NEXT: BFE_INT T57.W, PS, 0.0, literal.x,
4808 ; EG-NEXT: LSHR * T6.W, T44.Z, literal.x,
4809 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4810 ; EG-NEXT: BFE_INT T59.X, T42.Z, 0.0, literal.x,
4811 ; EG-NEXT: BFE_INT T57.Y, PS, 0.0, literal.x,
4812 ; EG-NEXT: BFE_INT T44.Z, T42.Y, 0.0, literal.x,
4813 ; EG-NEXT: BFE_INT T58.W, PV.Y, 0.0, literal.x,
4814 ; EG-NEXT: LSHR * T6.W, T44.X, literal.x,
4815 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4816 ; EG-NEXT: BFE_INT T44.X, T42.X, 0.0, literal.x,
4817 ; EG-NEXT: BFE_INT T58.Y, PS, 0.0, literal.x,
4818 ; EG-NEXT: BFE_INT T60.Z, T43.W, 0.0, literal.x,
4819 ; EG-NEXT: BFE_INT T59.W, T5.W, 0.0, literal.x, BS:VEC_120/SCL_212
4820 ; EG-NEXT: LSHR * T5.W, T42.Z, literal.x,
4821 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4822 ; EG-NEXT: BFE_INT T60.X, T43.Z, 0.0, literal.x,
4823 ; EG-NEXT: BFE_INT T59.Y, PS, 0.0, literal.x,
4824 ; EG-NEXT: BFE_INT T42.Z, T43.Y, 0.0, literal.x,
4825 ; EG-NEXT: BFE_INT T44.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4826 ; EG-NEXT: LSHR * T5.W, T42.X, literal.x,
4827 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4828 ; EG-NEXT: BFE_INT T42.X, T43.X, 0.0, literal.x,
4829 ; EG-NEXT: BFE_INT T44.Y, PS, 0.0, literal.x,
4830 ; EG-NEXT: BFE_INT T61.Z, T45.W, 0.0, literal.x,
4831 ; EG-NEXT: BFE_INT * T60.W, T4.W, 0.0, literal.x, BS:VEC_120/SCL_212
4832 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4833 ; EG-NEXT: ALU clause starting at 133:
4834 ; EG-NEXT: LSHR * T4.W, T43.Z, literal.x,
4835 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4836 ; EG-NEXT: BFE_INT T61.X, T45.Z, 0.0, literal.x,
4837 ; EG-NEXT: BFE_INT T60.Y, PV.W, 0.0, literal.x,
4838 ; EG-NEXT: BFE_INT T43.Z, T45.Y, 0.0, literal.x,
4839 ; EG-NEXT: BFE_INT T42.W, T3.W, 0.0, literal.x,
4840 ; EG-NEXT: LSHR * T3.W, T43.X, literal.x,
4841 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4842 ; EG-NEXT: BFE_INT T43.X, T45.X, 0.0, literal.x,
4843 ; EG-NEXT: BFE_INT T42.Y, PS, 0.0, literal.x,
4844 ; EG-NEXT: BFE_INT T62.Z, T46.W, 0.0, literal.x,
4845 ; EG-NEXT: BFE_INT T61.W, T3.Y, 0.0, literal.x,
4846 ; EG-NEXT: LSHR * T3.W, T45.Z, literal.x,
4847 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4848 ; EG-NEXT: BFE_INT T62.X, T46.Z, 0.0, literal.x,
4849 ; EG-NEXT: BFE_INT T61.Y, PS, 0.0, literal.x,
4850 ; EG-NEXT: BFE_INT T45.Z, T46.Y, 0.0, literal.x,
4851 ; EG-NEXT: BFE_INT T43.W, T2.W, 0.0, literal.x,
4852 ; EG-NEXT: LSHR * T2.W, T45.X, literal.x,
4853 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4854 ; EG-NEXT: BFE_INT T45.X, T46.X, 0.0, literal.x,
4855 ; EG-NEXT: BFE_INT T43.Y, PS, 0.0, literal.x,
4856 ; EG-NEXT: BFE_INT T63.Z, T47.W, 0.0, literal.x,
4857 ; EG-NEXT: BFE_INT T62.W, T2.Z, 0.0, literal.x,
4858 ; EG-NEXT: LSHR * T2.W, T46.Z, literal.x,
4859 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4860 ; EG-NEXT: BFE_INT T63.X, T47.Z, 0.0, literal.x,
4861 ; EG-NEXT: BFE_INT T62.Y, PS, 0.0, literal.x,
4862 ; EG-NEXT: BFE_INT T46.Z, T47.Y, 0.0, literal.x,
4863 ; EG-NEXT: BFE_INT T45.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4864 ; EG-NEXT: LSHR * T2.W, T46.X, literal.x,
4865 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4866 ; EG-NEXT: BFE_INT T46.X, T47.X, 0.0, literal.x,
4867 ; EG-NEXT: BFE_INT T45.Y, PS, 0.0, literal.x,
4868 ; EG-NEXT: BFE_INT T64.Z, T48.W, 0.0, literal.x,
4869 ; EG-NEXT: BFE_INT T63.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212
4870 ; EG-NEXT: LSHR * T1.W, T47.Z, literal.x,
4871 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4872 ; EG-NEXT: BFE_INT T64.X, T48.Z, 0.0, literal.x,
4873 ; EG-NEXT: BFE_INT T63.Y, PS, 0.0, literal.x,
4874 ; EG-NEXT: BFE_INT T47.Z, T48.Y, 0.0, literal.x,
4875 ; EG-NEXT: BFE_INT T46.W, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4876 ; EG-NEXT: LSHR * T1.W, T47.X, literal.x,
4877 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4878 ; EG-NEXT: BFE_INT T47.X, T48.X, 0.0, literal.x,
4879 ; EG-NEXT: BFE_INT T46.Y, PS, 0.0, literal.x,
4880 ; EG-NEXT: BFE_INT T65.Z, T41.W, 0.0, literal.x,
4881 ; EG-NEXT: BFE_INT T64.W, T1.Y, 0.0, literal.x,
4882 ; EG-NEXT: LSHR * T1.W, T48.Z, literal.x,
4883 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4884 ; EG-NEXT: BFE_INT T65.X, T41.Z, 0.0, literal.x,
4885 ; EG-NEXT: BFE_INT T64.Y, PS, 0.0, literal.x,
4886 ; EG-NEXT: BFE_INT T48.Z, T41.Y, 0.0, literal.x,
4887 ; EG-NEXT: BFE_INT T47.W, T0.W, 0.0, literal.x,
4888 ; EG-NEXT: LSHR * T0.W, T48.X, literal.x,
4889 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4890 ; EG-NEXT: BFE_INT T48.X, T41.X, 0.0, literal.x,
4891 ; EG-NEXT: BFE_INT T47.Y, PS, 0.0, literal.x,
4892 ; EG-NEXT: LSHR T1.Z, T41.Z, literal.x,
4893 ; EG-NEXT: BFE_INT T65.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4894 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4895 ; EG-NEXT: 16(2.242078e-44), 240(3.363116e-43)
4896 ; EG-NEXT: LSHR T66.X, PS, literal.x,
4897 ; EG-NEXT: BFE_INT T65.Y, PV.Z, 0.0, literal.y,
4898 ; EG-NEXT: LSHR T0.Z, T41.X, literal.y,
4899 ; EG-NEXT: BFE_INT T48.W, T0.Y, 0.0, literal.y,
4900 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
4901 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4902 ; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00)
4903 ; EG-NEXT: LSHR T41.X, PS, literal.x,
4904 ; EG-NEXT: BFE_INT * T48.Y, PV.Z, 0.0, literal.y,
4905 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4907 ; CM-LABEL: global_sextload_v64i16_to_v64i32:
4909 ; CM-NEXT: ALU 0, @40, KC0[CB0:0-32], KC1[]
4910 ; CM-NEXT: TEX 1 @24
4911 ; CM-NEXT: ALU 15, @41, KC0[CB0:0-32], KC1[]
4912 ; CM-NEXT: TEX 5 @28
4913 ; CM-NEXT: ALU 82, @57, KC0[CB0:0-32], KC1[]
4914 ; CM-NEXT: ALU 72, @140, KC0[CB0:0-32], KC1[]
4915 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T65, T66.X
4916 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T36, T35.X
4917 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T64, T56.X
4918 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T37, T55.X
4919 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T63, T54.X
4920 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T53.X
4921 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T62, T52.X
4922 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T51.X
4923 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T61, T50.X
4924 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T43, T49.X
4925 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T60, T48.X
4926 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T47.X
4927 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T59, T46.X
4928 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T40.X
4929 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T58, T39.X
4930 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T57, T38.X
4933 ; CM-NEXT: Fetch clause starting at 24:
4934 ; CM-NEXT: VTX_READ_128 T36.XYZW, T37.X, 16, #1
4935 ; CM-NEXT: VTX_READ_128 T35.XYZW, T37.X, 0, #1
4936 ; CM-NEXT: Fetch clause starting at 28:
4937 ; CM-NEXT: VTX_READ_128 T41.XYZW, T37.X, 112, #1
4938 ; CM-NEXT: VTX_READ_128 T42.XYZW, T37.X, 96, #1
4939 ; CM-NEXT: VTX_READ_128 T43.XYZW, T37.X, 80, #1
4940 ; CM-NEXT: VTX_READ_128 T44.XYZW, T37.X, 64, #1
4941 ; CM-NEXT: VTX_READ_128 T45.XYZW, T37.X, 48, #1
4942 ; CM-NEXT: VTX_READ_128 T37.XYZW, T37.X, 32, #1
4943 ; CM-NEXT: ALU clause starting at 40:
4944 ; CM-NEXT: MOV * T37.X, KC0[2].Z,
4945 ; CM-NEXT: ALU clause starting at 41:
4946 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
4947 ; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00)
4948 ; CM-NEXT: LSHR T38.X, PV.W, literal.x,
4949 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
4950 ; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43)
4951 ; CM-NEXT: LSHR T39.X, PV.W, literal.x,
4952 ; CM-NEXT: LSHR T0.Y, T35.Z, literal.y,
4953 ; CM-NEXT: LSHR T0.Z, T35.W, literal.y,
4954 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
4955 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4956 ; CM-NEXT: 192(2.690493e-43), 0(0.000000e+00)
4957 ; CM-NEXT: LSHR T40.X, PV.W, literal.x,
4958 ; CM-NEXT: LSHR T1.Y, T35.Y, literal.y,
4959 ; CM-NEXT: LSHR T1.Z, T36.Z, literal.y,
4960 ; CM-NEXT: LSHR * T0.W, T36.W, literal.y,
4961 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4962 ; CM-NEXT: ALU clause starting at 57:
4963 ; CM-NEXT: LSHR T2.Z, T36.X, literal.x,
4964 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
4965 ; CM-NEXT: 16(2.242078e-44), 208(2.914701e-43)
4966 ; CM-NEXT: LSHR T46.X, PV.W, literal.x,
4967 ; CM-NEXT: LSHR T2.Y, T36.Y, literal.y,
4968 ; CM-NEXT: LSHR T3.Z, T37.Z, literal.y,
4969 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
4970 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4971 ; CM-NEXT: 160(2.242078e-43), 0(0.000000e+00)
4972 ; CM-NEXT: LSHR T47.X, PV.W, literal.x,
4973 ; CM-NEXT: LSHR T3.Y, T37.W, literal.y,
4974 ; CM-NEXT: LSHR T4.Z, T37.X, literal.y,
4975 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
4976 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4977 ; CM-NEXT: 176(2.466285e-43), 0(0.000000e+00)
4978 ; CM-NEXT: LSHR T48.X, PV.W, literal.x,
4979 ; CM-NEXT: LSHR T4.Y, T37.Y, literal.y,
4980 ; CM-NEXT: LSHR T5.Z, T45.Z, literal.y,
4981 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
4982 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4983 ; CM-NEXT: 128(1.793662e-43), 0(0.000000e+00)
4984 ; CM-NEXT: LSHR T49.X, PV.W, literal.x,
4985 ; CM-NEXT: LSHR T5.Y, T45.W, literal.y,
4986 ; CM-NEXT: LSHR T6.Z, T45.X, literal.y,
4987 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
4988 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4989 ; CM-NEXT: 144(2.017870e-43), 0(0.000000e+00)
4990 ; CM-NEXT: LSHR T50.X, PV.W, literal.x,
4991 ; CM-NEXT: LSHR T6.Y, T45.Y, literal.y,
4992 ; CM-NEXT: LSHR T7.Z, T44.Z, literal.y,
4993 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
4994 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
4995 ; CM-NEXT: 96(1.345247e-43), 0(0.000000e+00)
4996 ; CM-NEXT: LSHR T51.X, PV.W, literal.x,
4997 ; CM-NEXT: LSHR T7.Y, T44.W, literal.y,
4998 ; CM-NEXT: LSHR T8.Z, T44.X, literal.y,
4999 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
5000 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
5001 ; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00)
5002 ; CM-NEXT: LSHR T52.X, PV.W, literal.x,
5003 ; CM-NEXT: LSHR T8.Y, T44.Y, literal.y,
5004 ; CM-NEXT: LSHR T9.Z, T43.Z, literal.y,
5005 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
5006 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
5007 ; CM-NEXT: 64(8.968310e-44), 0(0.000000e+00)
5008 ; CM-NEXT: LSHR T53.X, PV.W, literal.x,
5009 ; CM-NEXT: LSHR T9.Y, T43.W, literal.y,
5010 ; CM-NEXT: LSHR T10.Z, T43.X, literal.y,
5011 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
5012 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
5013 ; CM-NEXT: 80(1.121039e-43), 0(0.000000e+00)
5014 ; CM-NEXT: LSHR T54.X, PV.W, literal.x,
5015 ; CM-NEXT: LSHR T10.Y, T43.Y, literal.y,
5016 ; CM-NEXT: LSHR T11.Z, T42.Z, literal.y,
5017 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
5018 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
5019 ; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
5020 ; CM-NEXT: LSHR T55.X, PV.W, literal.x,
5021 ; CM-NEXT: LSHR T11.Y, T42.W, literal.y,
5022 ; CM-NEXT: LSHR T12.Z, T42.X, literal.y,
5023 ; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
5024 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
5025 ; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00)
5026 ; CM-NEXT: LSHR T56.X, PV.W, literal.x,
5027 ; CM-NEXT: LSHR T12.Y, T42.Y, literal.y,
5028 ; CM-NEXT: BFE_INT T57.Z, T41.Y, 0.0, literal.y, BS:VEC_120/SCL_212
5029 ; CM-NEXT: LSHR * T1.W, T41.Z, literal.y,
5030 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
5031 ; CM-NEXT: BFE_INT T57.X, T41.X, 0.0, literal.x,
5032 ; CM-NEXT: LSHR T13.Y, T41.W, literal.x,
5033 ; CM-NEXT: BFE_INT T58.Z, T41.W, 0.0, literal.x,
5034 ; CM-NEXT: LSHR * T2.W, T41.Y, literal.x,
5035 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5036 ; CM-NEXT: BFE_INT T58.X, T41.Z, 0.0, literal.x,
5037 ; CM-NEXT: LSHR T14.Y, T41.X, literal.x,
5038 ; CM-NEXT: BFE_INT T41.Z, T42.Y, 0.0, literal.x,
5039 ; CM-NEXT: BFE_INT * T57.W, PV.W, 0.0, literal.x,
5040 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5041 ; CM-NEXT: BFE_INT T41.X, T42.X, 0.0, literal.x,
5042 ; CM-NEXT: BFE_INT T57.Y, PV.Y, 0.0, literal.x,
5043 ; CM-NEXT: BFE_INT T59.Z, T42.W, 0.0, literal.x,
5044 ; CM-NEXT: BFE_INT * T58.W, T13.Y, 0.0, literal.x,
5045 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5046 ; CM-NEXT: ALU clause starting at 140:
5047 ; CM-NEXT: BFE_INT T59.X, T42.Z, 0.0, literal.x,
5048 ; CM-NEXT: BFE_INT T58.Y, T1.W, 0.0, literal.x,
5049 ; CM-NEXT: BFE_INT T42.Z, T43.Y, 0.0, literal.x,
5050 ; CM-NEXT: BFE_INT * T41.W, T12.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5051 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5052 ; CM-NEXT: BFE_INT T42.X, T43.X, 0.0, literal.x,
5053 ; CM-NEXT: BFE_INT T41.Y, T12.Z, 0.0, literal.x,
5054 ; CM-NEXT: BFE_INT T60.Z, T43.W, 0.0, literal.x,
5055 ; CM-NEXT: BFE_INT * T59.W, T11.Y, 0.0, literal.x,
5056 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5057 ; CM-NEXT: BFE_INT T60.X, T43.Z, 0.0, literal.x,
5058 ; CM-NEXT: BFE_INT T59.Y, T11.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5059 ; CM-NEXT: BFE_INT T43.Z, T44.Y, 0.0, literal.x,
5060 ; CM-NEXT: BFE_INT * T42.W, T10.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5061 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5062 ; CM-NEXT: BFE_INT T43.X, T44.X, 0.0, literal.x,
5063 ; CM-NEXT: BFE_INT T42.Y, T10.Z, 0.0, literal.x,
5064 ; CM-NEXT: BFE_INT T61.Z, T44.W, 0.0, literal.x,
5065 ; CM-NEXT: BFE_INT * T60.W, T9.Y, 0.0, literal.x,
5066 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5067 ; CM-NEXT: BFE_INT T61.X, T44.Z, 0.0, literal.x,
5068 ; CM-NEXT: BFE_INT T60.Y, T9.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5069 ; CM-NEXT: BFE_INT T44.Z, T45.Y, 0.0, literal.x,
5070 ; CM-NEXT: BFE_INT * T43.W, T8.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5071 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5072 ; CM-NEXT: BFE_INT T44.X, T45.X, 0.0, literal.x,
5073 ; CM-NEXT: BFE_INT T43.Y, T8.Z, 0.0, literal.x,
5074 ; CM-NEXT: BFE_INT T62.Z, T45.W, 0.0, literal.x,
5075 ; CM-NEXT: BFE_INT * T61.W, T7.Y, 0.0, literal.x,
5076 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5077 ; CM-NEXT: BFE_INT T62.X, T45.Z, 0.0, literal.x,
5078 ; CM-NEXT: BFE_INT T61.Y, T7.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5079 ; CM-NEXT: BFE_INT T45.Z, T37.Y, 0.0, literal.x,
5080 ; CM-NEXT: BFE_INT * T44.W, T6.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5081 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5082 ; CM-NEXT: BFE_INT T45.X, T37.X, 0.0, literal.x,
5083 ; CM-NEXT: BFE_INT T44.Y, T6.Z, 0.0, literal.x,
5084 ; CM-NEXT: BFE_INT T63.Z, T37.W, 0.0, literal.x,
5085 ; CM-NEXT: BFE_INT * T62.W, T5.Y, 0.0, literal.x,
5086 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5087 ; CM-NEXT: BFE_INT T63.X, T37.Z, 0.0, literal.x,
5088 ; CM-NEXT: BFE_INT T62.Y, T5.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5089 ; CM-NEXT: BFE_INT T37.Z, T36.Y, 0.0, literal.x,
5090 ; CM-NEXT: BFE_INT * T45.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5091 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5092 ; CM-NEXT: BFE_INT T37.X, T36.X, 0.0, literal.x,
5093 ; CM-NEXT: BFE_INT T45.Y, T4.Z, 0.0, literal.x,
5094 ; CM-NEXT: BFE_INT T64.Z, T36.W, 0.0, literal.x,
5095 ; CM-NEXT: BFE_INT * T63.W, T3.Y, 0.0, literal.x,
5096 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5097 ; CM-NEXT: BFE_INT T64.X, T36.Z, 0.0, literal.x,
5098 ; CM-NEXT: BFE_INT T63.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5099 ; CM-NEXT: BFE_INT T36.Z, T35.Y, 0.0, literal.x,
5100 ; CM-NEXT: BFE_INT * T37.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5101 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5102 ; CM-NEXT: BFE_INT T36.X, T35.X, 0.0, literal.x,
5103 ; CM-NEXT: BFE_INT T37.Y, T2.Z, 0.0, literal.x,
5104 ; CM-NEXT: BFE_INT T65.Z, T35.W, 0.0, literal.x,
5105 ; CM-NEXT: BFE_INT * T64.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212
5106 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5107 ; CM-NEXT: BFE_INT T65.X, T35.Z, 0.0, literal.x,
5108 ; CM-NEXT: BFE_INT T64.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5109 ; CM-NEXT: LSHR T1.Z, T35.X, literal.x,
5110 ; CM-NEXT: BFE_INT * T36.W, T1.Y, 0.0, literal.x,
5111 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5112 ; CM-NEXT: LSHR T35.X, KC0[2].Y, literal.x,
5113 ; CM-NEXT: BFE_INT T36.Y, PV.Z, 0.0, literal.y,
5114 ; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y,
5115 ; CM-NEXT: BFE_INT * T65.W, T0.Z, 0.0, literal.y,
5116 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
5117 ; CM-NEXT: LSHR T66.X, PV.Z, literal.x,
5118 ; CM-NEXT: BFE_INT * T65.Y, T0.Y, 0.0, literal.y,
5119 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
5120 %load = load <64 x i16>, ptr addrspace(1) %in
5121 %ext = sext <64 x i16> %load to <64 x i32>
5122 store <64 x i32> %ext, ptr addrspace(1) %out
5126 define amdgpu_kernel void @global_zextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
5127 ; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i64:
5128 ; GCN-NOHSA-SI: ; %bb.0:
5129 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
5130 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
5131 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
5132 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
5133 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
5134 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
5135 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
5136 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
5137 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
5138 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
5139 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
5140 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0
5141 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
5142 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5143 ; GCN-NOHSA-SI-NEXT: s_endpgm
5145 ; GCN-HSA-LABEL: global_zextload_i16_to_i64:
5147 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
5148 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
5149 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
5150 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
5151 ; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1]
5152 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0
5153 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1
5154 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0
5155 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
5156 ; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
5157 ; GCN-HSA-NEXT: s_endpgm
5159 ; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i64:
5160 ; GCN-NOHSA-VI: ; %bb.0:
5161 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
5162 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
5163 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
5164 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
5165 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
5166 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
5167 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
5168 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
5169 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
5170 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
5171 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
5172 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
5173 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
5174 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
5175 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5176 ; GCN-NOHSA-VI-NEXT: s_endpgm
5178 ; EG-LABEL: global_zextload_i16_to_i64:
5180 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5182 ; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
5183 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5186 ; EG-NEXT: Fetch clause starting at 6:
5187 ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
5188 ; EG-NEXT: ALU clause starting at 8:
5189 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
5190 ; EG-NEXT: ALU clause starting at 9:
5191 ; EG-NEXT: MOV * T0.Y, 0.0,
5192 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
5193 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5195 ; CM-LABEL: global_zextload_i16_to_i64:
5197 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5199 ; CM-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
5200 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5203 ; CM-NEXT: Fetch clause starting at 6:
5204 ; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
5205 ; CM-NEXT: ALU clause starting at 8:
5206 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
5207 ; CM-NEXT: ALU clause starting at 9:
5208 ; CM-NEXT: MOV * T0.Y, 0.0,
5209 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
5210 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5211 %a = load i16, ptr addrspace(1) %in
5212 %ext = zext i16 %a to i64
5213 store i64 %ext, ptr addrspace(1) %out
5217 ; FIXME: Need to optimize this sequence to avoid extra bfe:
5218 ; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64
5219 ; t31: i64 = any_extend t28
5220 ; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
5222 ; TODO: These could be expanded earlier using ASHR 15
5223 define amdgpu_kernel void @global_sextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
5224 ; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i64:
5225 ; GCN-NOHSA-SI: ; %bb.0:
5226 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
5227 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
5228 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
5229 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
5230 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
5231 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
5232 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
5233 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
5234 ; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0
5235 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
5236 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
5237 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
5238 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5239 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5240 ; GCN-NOHSA-SI-NEXT: s_endpgm
5242 ; GCN-HSA-LABEL: global_sextload_i16_to_i64:
5244 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
5245 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
5246 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
5247 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
5248 ; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1]
5249 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0
5250 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1
5251 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
5252 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5253 ; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
5254 ; GCN-HSA-NEXT: s_endpgm
5256 ; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i64:
5257 ; GCN-NOHSA-VI: ; %bb.0:
5258 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
5259 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
5260 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
5261 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
5262 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
5263 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
5264 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
5265 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
5266 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
5267 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
5268 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
5269 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
5270 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16
5271 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5272 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5273 ; GCN-NOHSA-VI-NEXT: s_endpgm
5275 ; EG-LABEL: global_sextload_i16_to_i64:
5277 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5279 ; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
5280 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5283 ; EG-NEXT: Fetch clause starting at 6:
5284 ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
5285 ; EG-NEXT: ALU clause starting at 8:
5286 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
5287 ; EG-NEXT: ALU clause starting at 9:
5288 ; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x,
5289 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
5290 ; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
5291 ; EG-NEXT: ASHR * T0.Y, PV.X, literal.x,
5292 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
5294 ; CM-LABEL: global_sextload_i16_to_i64:
5296 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5298 ; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
5299 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5302 ; CM-NEXT: Fetch clause starting at 6:
5303 ; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
5304 ; CM-NEXT: ALU clause starting at 8:
5305 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
5306 ; CM-NEXT: ALU clause starting at 9:
5307 ; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x,
5308 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5309 ; CM-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
5310 ; CM-NEXT: ASHR * T0.Y, PV.X, literal.y,
5311 ; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44)
5312 %a = load i16, ptr addrspace(1) %in
5313 %ext = sext i16 %a to i64
5314 store i64 %ext, ptr addrspace(1) %out
5318 define amdgpu_kernel void @global_zextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
5319 ; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i64:
5320 ; GCN-NOHSA-SI: ; %bb.0:
5321 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
5322 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
5323 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
5324 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
5325 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
5326 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
5327 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
5328 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
5329 ; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
5330 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
5331 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
5332 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0
5333 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
5334 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5335 ; GCN-NOHSA-SI-NEXT: s_endpgm
5337 ; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i64:
5339 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
5340 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
5341 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
5342 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
5343 ; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1]
5344 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0
5345 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1
5346 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0
5347 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
5348 ; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
5349 ; GCN-HSA-NEXT: s_endpgm
5351 ; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i64:
5352 ; GCN-NOHSA-VI: ; %bb.0:
5353 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
5354 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
5355 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
5356 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
5357 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
5358 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
5359 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
5360 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
5361 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
5362 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
5363 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
5364 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
5365 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
5366 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
5367 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5368 ; GCN-NOHSA-VI-NEXT: s_endpgm
5370 ; EG-LABEL: global_zextload_v1i16_to_v1i64:
5372 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5374 ; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
5375 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5378 ; EG-NEXT: Fetch clause starting at 6:
5379 ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
5380 ; EG-NEXT: ALU clause starting at 8:
5381 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
5382 ; EG-NEXT: ALU clause starting at 9:
5383 ; EG-NEXT: MOV * T0.Y, 0.0,
5384 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
5385 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5387 ; CM-LABEL: global_zextload_v1i16_to_v1i64:
5389 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5391 ; CM-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
5392 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5395 ; CM-NEXT: Fetch clause starting at 6:
5396 ; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
5397 ; CM-NEXT: ALU clause starting at 8:
5398 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
5399 ; CM-NEXT: ALU clause starting at 9:
5400 ; CM-NEXT: MOV * T0.Y, 0.0,
5401 ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
5402 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5403 %load = load <1 x i16>, ptr addrspace(1) %in
5404 %ext = zext <1 x i16> %load to <1 x i64>
5405 store <1 x i64> %ext, ptr addrspace(1) %out
5409 ; TODO: These could be expanded earlier using ASHR 15
5410 define amdgpu_kernel void @global_sextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
5411 ; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i64:
5412 ; GCN-NOHSA-SI: ; %bb.0:
5413 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
5414 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
5415 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
5416 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
5417 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
5418 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
5419 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
5420 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
5421 ; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0
5422 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
5423 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
5424 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
5425 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5426 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5427 ; GCN-NOHSA-SI-NEXT: s_endpgm
5429 ; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i64:
5431 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
5432 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
5433 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
5434 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
5435 ; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1]
5436 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0
5437 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1
5438 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
5439 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5440 ; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
5441 ; GCN-HSA-NEXT: s_endpgm
5443 ; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i64:
5444 ; GCN-NOHSA-VI: ; %bb.0:
5445 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
5446 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
5447 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
5448 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
5449 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
5450 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
5451 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
5452 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
5453 ; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
5454 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
5455 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
5456 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
5457 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16
5458 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5459 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5460 ; GCN-NOHSA-VI-NEXT: s_endpgm
5462 ; EG-LABEL: global_sextload_v1i16_to_v1i64:
5464 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5466 ; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
5467 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5470 ; EG-NEXT: Fetch clause starting at 6:
5471 ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
5472 ; EG-NEXT: ALU clause starting at 8:
5473 ; EG-NEXT: MOV * T0.X, KC0[2].Z,
5474 ; EG-NEXT: ALU clause starting at 9:
5475 ; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x,
5476 ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
5477 ; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
5478 ; EG-NEXT: ASHR * T0.Y, PV.X, literal.x,
5479 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
5481 ; CM-LABEL: global_sextload_v1i16_to_v1i64:
5483 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5485 ; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
5486 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5489 ; CM-NEXT: Fetch clause starting at 6:
5490 ; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
5491 ; CM-NEXT: ALU clause starting at 8:
5492 ; CM-NEXT: MOV * T0.X, KC0[2].Z,
5493 ; CM-NEXT: ALU clause starting at 9:
5494 ; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x,
5495 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5496 ; CM-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
5497 ; CM-NEXT: ASHR * T0.Y, PV.X, literal.y,
5498 ; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44)
5499 %load = load <1 x i16>, ptr addrspace(1) %in
5500 %ext = sext <1 x i16> %load to <1 x i64>
5501 store <1 x i64> %ext, ptr addrspace(1) %out
5505 define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
5506 ; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i64:
5507 ; GCN-NOHSA-SI: ; %bb.0:
5508 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
5509 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
5510 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
5511 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
5512 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
5513 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
5514 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
5515 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
5516 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
5517 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0
5518 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
5519 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
5520 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
5521 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0
5522 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
5523 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1
5524 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5525 ; GCN-NOHSA-SI-NEXT: s_endpgm
5527 ; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i64:
5529 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
5530 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
5531 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
5532 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
5533 ; GCN-HSA-NEXT: flat_load_dword v0, v[0:1]
5534 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0
5535 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
5536 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
5537 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1
5538 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
5539 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0
5540 ; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
5541 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
5542 ; GCN-HSA-NEXT: s_endpgm
5544 ; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i64:
5545 ; GCN-NOHSA-VI: ; %bb.0:
5546 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
5547 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
5548 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
5549 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
5550 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
5551 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
5552 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
5553 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
5554 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
5555 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
5556 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
5557 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
5558 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1
5559 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
5560 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v0
5561 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
5562 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5563 ; GCN-NOHSA-VI-NEXT: s_endpgm
5565 ; EG-LABEL: global_zextload_v2i16_to_v2i64:
5567 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5569 ; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[]
5570 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
5573 ; EG-NEXT: Fetch clause starting at 6:
5574 ; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
5575 ; EG-NEXT: ALU clause starting at 8:
5576 ; EG-NEXT: MOV * T4.X, KC0[2].Z,
5577 ; EG-NEXT: ALU clause starting at 9:
5578 ; EG-NEXT: LSHR * T4.Z, T4.X, literal.x,
5579 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5580 ; EG-NEXT: AND_INT T4.X, T4.X, literal.x,
5581 ; EG-NEXT: MOV T4.Y, 0.0,
5582 ; EG-NEXT: MOV T4.W, 0.0,
5583 ; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
5584 ; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
5586 ; CM-LABEL: global_zextload_v2i16_to_v2i64:
5588 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5590 ; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
5591 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
5594 ; CM-NEXT: Fetch clause starting at 6:
5595 ; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
5596 ; CM-NEXT: ALU clause starting at 8:
5597 ; CM-NEXT: MOV * T4.X, KC0[2].Z,
5598 ; CM-NEXT: ALU clause starting at 9:
5599 ; CM-NEXT: LSHR * T4.Z, T4.X, literal.x,
5600 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5601 ; CM-NEXT: AND_INT T4.X, T4.X, literal.x,
5602 ; CM-NEXT: MOV T4.Y, 0.0,
5603 ; CM-NEXT: MOV * T4.W, 0.0,
5604 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
5605 ; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x,
5606 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5607 %load = load <2 x i16>, ptr addrspace(1) %in
5608 %ext = zext <2 x i16> %load to <2 x i64>
5609 store <2 x i64> %ext, ptr addrspace(1) %out
5613 define amdgpu_kernel void @global_sextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
5614 ; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i64:
5615 ; GCN-NOHSA-SI: ; %bb.0:
5616 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
5617 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
5618 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
5619 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
5620 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
5621 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
5622 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
5623 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
5624 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
5625 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
5626 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
5627 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
5628 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0
5629 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v0, 0, 16
5630 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5631 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v2, 0, 16
5632 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
5633 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5634 ; GCN-NOHSA-SI-NEXT: s_endpgm
5636 ; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i64:
5638 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
5639 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
5640 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
5641 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
5642 ; GCN-HSA-NEXT: flat_load_dword v0, v[0:1]
5643 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
5644 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
5645 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
5646 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0
5647 ; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16
5648 ; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16
5649 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5650 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2
5651 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
5652 ; GCN-HSA-NEXT: s_endpgm
5654 ; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i64:
5655 ; GCN-NOHSA-VI: ; %bb.0:
5656 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
5657 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
5658 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
5659 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
5660 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
5661 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
5662 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
5663 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
5664 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[8:11], 0
5665 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
5666 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
5667 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
5668 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v1
5669 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16
5670 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v2, 0, 16
5671 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5672 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
5673 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5674 ; GCN-NOHSA-VI-NEXT: s_endpgm
5676 ; EG-LABEL: global_sextload_v2i16_to_v2i64:
5678 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5680 ; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
5681 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
5684 ; EG-NEXT: Fetch clause starting at 6:
5685 ; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
5686 ; EG-NEXT: ALU clause starting at 8:
5687 ; EG-NEXT: MOV * T4.X, KC0[2].Z,
5688 ; EG-NEXT: ALU clause starting at 9:
5689 ; EG-NEXT: ASHR * T4.W, T4.X, literal.x,
5690 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
5691 ; EG-NEXT: ASHR * T4.Z, T4.X, literal.x,
5692 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5693 ; EG-NEXT: BFE_INT T4.X, T4.X, 0.0, literal.x,
5694 ; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
5695 ; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
5696 ; EG-NEXT: ASHR * T4.Y, PV.X, literal.x,
5697 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
5699 ; CM-LABEL: global_sextload_v2i16_to_v2i64:
5701 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5703 ; CM-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
5704 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
5707 ; CM-NEXT: Fetch clause starting at 6:
5708 ; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
5709 ; CM-NEXT: ALU clause starting at 8:
5710 ; CM-NEXT: MOV * T4.X, KC0[2].Z,
5711 ; CM-NEXT: ALU clause starting at 9:
5712 ; CM-NEXT: ASHR * T4.W, T4.X, literal.x,
5713 ; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00)
5714 ; CM-NEXT: ASHR * T4.Z, T4.X, literal.x,
5715 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5716 ; CM-NEXT: BFE_INT * T4.X, T4.X, 0.0, literal.x,
5717 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5718 ; CM-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
5719 ; CM-NEXT: ASHR * T4.Y, PV.X, literal.y,
5720 ; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44)
5721 %load = load <2 x i16>, ptr addrspace(1) %in
5722 %ext = sext <2 x i16> %load to <2 x i64>
5723 store <2 x i64> %ext, ptr addrspace(1) %out
5727 define amdgpu_kernel void @global_zextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
5728 ; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i64:
5729 ; GCN-NOHSA-SI: ; %bb.0:
5730 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
5731 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
5732 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
5733 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
5734 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
5735 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
5736 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
5737 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
5738 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[8:9], off, s[8:11], 0
5739 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0
5740 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1
5741 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1
5742 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v1
5743 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
5744 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
5745 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
5746 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v9
5747 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v8
5748 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v8
5749 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v9
5750 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
5751 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
5752 ; GCN-NOHSA-SI-NEXT: s_endpgm
5754 ; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i64:
5756 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
5757 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
5758 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
5759 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
5760 ; GCN-HSA-NEXT: flat_load_dwordx2 v[8:9], v[0:1]
5761 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
5762 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
5763 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0
5764 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3
5765 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1
5766 ; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1
5767 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2
5768 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, v1
5769 ; GCN-HSA-NEXT: v_mov_b32_e32 v7, v1
5770 ; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0
5771 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
5772 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v9
5773 ; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v9
5774 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v8
5775 ; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v8
5776 ; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[0:3]
5777 ; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7]
5778 ; GCN-HSA-NEXT: s_endpgm
5780 ; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i64:
5781 ; GCN-NOHSA-VI: ; %bb.0:
5782 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
5783 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
5784 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
5785 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
5786 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
5787 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
5788 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
5789 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
5790 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[8:9], off, s[8:11], 0
5791 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0
5792 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1
5793 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
5794 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
5795 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v1
5796 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, v1
5797 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
5798 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v9
5799 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v9
5800 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v8
5801 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v8
5802 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
5803 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
5804 ; GCN-NOHSA-VI-NEXT: s_endpgm
5806 ; EG-LABEL: global_zextload_v4i16_to_v4i64:
5808 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5810 ; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[]
5811 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T8.X, 0
5812 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T7.X, 1
5814 ; EG-NEXT: Fetch clause starting at 6:
5815 ; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
5816 ; EG-NEXT: ALU clause starting at 8:
5817 ; EG-NEXT: MOV * T5.X, KC0[2].Z,
5818 ; EG-NEXT: ALU clause starting at 9:
5819 ; EG-NEXT: LSHR * T6.Z, T5.Y, literal.x,
5820 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5821 ; EG-NEXT: AND_INT T6.X, T5.Y, literal.x,
5822 ; EG-NEXT: MOV T6.Y, 0.0,
5823 ; EG-NEXT: LSHR T5.Z, T5.X, literal.y,
5824 ; EG-NEXT: AND_INT * T5.X, T5.X, literal.x,
5825 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
5826 ; EG-NEXT: MOV T5.Y, 0.0,
5827 ; EG-NEXT: MOV T6.W, 0.0,
5828 ; EG-NEXT: MOV * T5.W, 0.0,
5829 ; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
5830 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
5831 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
5832 ; EG-NEXT: LSHR * T8.X, PV.W, literal.x,
5833 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5835 ; CM-LABEL: global_zextload_v4i16_to_v4i64:
5837 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5839 ; CM-NEXT: ALU 16, @9, KC0[CB0:0-32], KC1[]
5840 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T8.X
5841 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T7.X
5843 ; CM-NEXT: Fetch clause starting at 6:
5844 ; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
5845 ; CM-NEXT: ALU clause starting at 8:
5846 ; CM-NEXT: MOV * T5.X, KC0[2].Z,
5847 ; CM-NEXT: ALU clause starting at 9:
5848 ; CM-NEXT: LSHR * T6.Z, T5.X, literal.x,
5849 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5850 ; CM-NEXT: AND_INT T6.X, T5.X, literal.x,
5851 ; CM-NEXT: MOV T6.Y, 0.0,
5852 ; CM-NEXT: LSHR * T5.Z, T5.Y, literal.y,
5853 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
5854 ; CM-NEXT: AND_INT T5.X, T5.Y, literal.x,
5855 ; CM-NEXT: MOV T5.Y, 0.0,
5856 ; CM-NEXT: MOV * T6.W, 0.0,
5857 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
5858 ; CM-NEXT: MOV * T5.W, 0.0,
5859 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
5860 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5861 ; CM-NEXT: LSHR * T7.X, PV.W, literal.x,
5862 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5863 ; CM-NEXT: LSHR * T8.X, KC0[2].Y, literal.x,
5864 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5865 %load = load <4 x i16>, ptr addrspace(1) %in
5866 %ext = zext <4 x i16> %load to <4 x i64>
5867 store <4 x i64> %ext, ptr addrspace(1) %out
5871 define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
5872 ; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i64:
5873 ; GCN-NOHSA-SI: ; %bb.0:
5874 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
5875 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000
5876 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1
5877 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6
5878 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7
5879 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
5880 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2
5881 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3
5882 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[1:2], off, s[8:11], 0
5883 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
5884 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
5885 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
5886 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v2
5887 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v1
5888 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v1, 0, 16
5889 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[6:7], v[1:2], 48
5890 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v3, 0, 16
5891 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5892 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v5, 0, 16
5893 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
5894 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
5895 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
5896 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5897 ; GCN-NOHSA-SI-NEXT: s_endpgm
5899 ; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i64:
5901 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
5902 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
5903 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
5904 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
5905 ; GCN-HSA-NEXT: flat_load_dwordx2 v[1:2], v[0:1]
5906 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
5907 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
5908 ; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3
5909 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1
5910 ; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2
5911 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0
5912 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
5913 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, v2
5914 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v1
5915 ; GCN-HSA-NEXT: v_ashr_i64 v[6:7], v[1:2], 48
5916 ; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16
5917 ; GCN-HSA-NEXT: v_bfe_i32 v4, v3, 0, 16
5918 ; GCN-HSA-NEXT: v_bfe_i32 v0, v1, 0, 16
5919 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4
5920 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5921 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2
5922 ; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7]
5923 ; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
5924 ; GCN-HSA-NEXT: s_endpgm
5926 ; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i64:
5927 ; GCN-NOHSA-VI: ; %bb.0:
5928 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
5929 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000
5930 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1
5931 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6
5932 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7
5933 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
5934 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2
5935 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3
5936 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[1:2], off, s[8:11], 0
5937 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0
5938 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1
5939 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
5940 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, v2
5941 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v2
5942 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1
5943 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v4, 0, 16
5944 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v5, 0, 16
5945 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16
5946 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v3, 0, 16
5947 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
5948 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
5949 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
5950 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
5951 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
5952 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5953 ; GCN-NOHSA-VI-NEXT: s_endpgm
5955 ; EG-LABEL: global_sextload_v4i16_to_v4i64:
5957 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5959 ; EG-NEXT: ALU 16, @9, KC0[CB0:0-32], KC1[]
5960 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0
5961 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
5963 ; EG-NEXT: Fetch clause starting at 6:
5964 ; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
5965 ; EG-NEXT: ALU clause starting at 8:
5966 ; EG-NEXT: MOV * T5.X, KC0[2].Z,
5967 ; EG-NEXT: ALU clause starting at 9:
5968 ; EG-NEXT: ASHR * T5.W, T5.X, literal.x,
5969 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
5970 ; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x,
5971 ; EG-NEXT: ASHR T5.Z, T5.X, literal.y,
5972 ; EG-NEXT: ASHR * T7.W, T5.Y, literal.z,
5973 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
5974 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
5975 ; EG-NEXT: BFE_INT T5.X, T5.X, 0.0, literal.x,
5976 ; EG-NEXT: ASHR * T7.Z, T5.Y, literal.x,
5977 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
5978 ; EG-NEXT: BFE_INT T7.X, T5.Y, 0.0, literal.x,
5979 ; EG-NEXT: ASHR T5.Y, PV.X, literal.y,
5980 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
5981 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
5982 ; EG-NEXT: LSHR T8.X, PV.W, literal.x,
5983 ; EG-NEXT: ASHR * T7.Y, PV.X, literal.y,
5984 ; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44)
5986 ; CM-LABEL: global_sextload_v4i16_to_v4i64:
5988 ; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
5990 ; CM-NEXT: ALU 16, @9, KC0[CB0:0-32], KC1[]
5991 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T8.X
5992 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T7.X
5994 ; CM-NEXT: Fetch clause starting at 6:
5995 ; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
5996 ; CM-NEXT: ALU clause starting at 8:
5997 ; CM-NEXT: MOV * T5.X, KC0[2].Z,
5998 ; CM-NEXT: ALU clause starting at 9:
5999 ; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x,
6000 ; CM-NEXT: ASHR * T6.W, T5.Y, literal.y,
6001 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
6002 ; CM-NEXT: LSHR T7.X, PV.Z, literal.x,
6003 ; CM-NEXT: ASHR T6.Z, T5.Y, literal.y,
6004 ; CM-NEXT: ASHR * T5.W, T5.X, literal.z,
6005 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
6006 ; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00)
6007 ; CM-NEXT: BFE_INT T6.X, T5.Y, 0.0, literal.x,
6008 ; CM-NEXT: ASHR * T5.Z, T5.X, literal.x,
6009 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
6010 ; CM-NEXT: BFE_INT T5.X, T5.X, 0.0, literal.x,
6011 ; CM-NEXT: ASHR * T6.Y, PV.X, literal.y,
6012 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
6013 ; CM-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
6014 ; CM-NEXT: ASHR * T5.Y, PV.X, literal.y,
6015 ; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44)
6016 %load = load <4 x i16>, ptr addrspace(1) %in
6017 %ext = sext <4 x i16> %load to <4 x i64>
6018 store <4 x i64> %ext, ptr addrspace(1) %out
6022 define amdgpu_kernel void @global_zextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
6023 ; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i64:
6024 ; GCN-NOHSA-SI: ; %bb.0:
6025 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
6026 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
6027 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
6028 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
6029 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
6030 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
6031 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
6032 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
6033 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6034 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, 0
6035 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v4
6036 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, v4
6037 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, v4
6038 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, v4
6039 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, v4
6040 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, v4
6041 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, v4
6042 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
6043 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
6044 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
6045 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v1
6046 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v3
6047 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v2
6048 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v0
6049 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v15, 0xffff, v0
6050 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v11, 0xffff, v2
6051 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v7, 0xffff, v1
6052 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v3, 0xffff, v3
6053 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:48
6054 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:16
6055 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:32
6056 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0
6057 ; GCN-NOHSA-SI-NEXT: s_endpgm
6059 ; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i64:
6061 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
6062 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, 0
6063 ; GCN-HSA-NEXT: v_mov_b32_e32 v6, v4
6064 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, v4
6065 ; GCN-HSA-NEXT: v_mov_b32_e32 v10, v4
6066 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
6067 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
6068 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
6069 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
6070 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
6071 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6072 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3
6073 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2
6074 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
6075 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, s1
6076 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6077 ; GCN-HSA-NEXT: v_mov_b32_e32 v19, s0
6078 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 32
6079 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
6080 ; GCN-HSA-NEXT: v_mov_b32_e32 v24, s3
6081 ; GCN-HSA-NEXT: v_mov_b32_e32 v26, s1
6082 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, v4
6083 ; GCN-HSA-NEXT: v_mov_b32_e32 v14, v4
6084 ; GCN-HSA-NEXT: v_mov_b32_e32 v16, v4
6085 ; GCN-HSA-NEXT: v_mov_b32_e32 v18, v4
6086 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s2
6087 ; GCN-HSA-NEXT: v_mov_b32_e32 v25, s0
6088 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
6089 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v3
6090 ; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v3
6091 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v1
6092 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v2
6093 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v0
6094 ; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v0
6095 ; GCN-HSA-NEXT: v_and_b32_e32 v11, 0xffff, v2
6096 ; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v1
6097 ; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[3:6]
6098 ; GCN-HSA-NEXT: flat_store_dwordx4 v[23:24], v[7:10]
6099 ; GCN-HSA-NEXT: flat_store_dwordx4 v[25:26], v[11:14]
6100 ; GCN-HSA-NEXT: flat_store_dwordx4 v[19:20], v[15:18]
6101 ; GCN-HSA-NEXT: s_endpgm
6103 ; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i64:
6104 ; GCN-NOHSA-VI: ; %bb.0:
6105 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
6106 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
6107 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
6108 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
6109 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
6110 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
6111 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
6112 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
6113 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6114 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, 0
6115 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, v4
6116 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
6117 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
6118 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v8, v4
6119 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v10, v4
6120 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v12, v4
6121 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v14, v4
6122 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v16, v4
6123 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v18, v4
6124 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
6125 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v3
6126 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v3, 0xffff, v3
6127 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v0
6128 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v15, 0xffff, v0
6129 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v1
6130 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v11, 0xffff, v1
6131 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v2
6132 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v7, 0xffff, v2
6133 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:48
6134 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:32
6135 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:16
6136 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0
6137 ; GCN-NOHSA-VI-NEXT: s_endpgm
6139 ; EG-LABEL: global_zextload_v8i16_to_v8i64:
6141 ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
6143 ; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[]
6144 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0
6145 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0
6146 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0
6147 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1
6149 ; EG-NEXT: Fetch clause starting at 8:
6150 ; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
6151 ; EG-NEXT: ALU clause starting at 10:
6152 ; EG-NEXT: MOV * T7.X, KC0[2].Z,
6153 ; EG-NEXT: ALU clause starting at 11:
6154 ; EG-NEXT: LSHR * T8.Z, T7.W, literal.x,
6155 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
6156 ; EG-NEXT: AND_INT T8.X, T7.W, literal.x,
6157 ; EG-NEXT: MOV T8.Y, 0.0,
6158 ; EG-NEXT: LSHR T9.Z, T7.Z, literal.y,
6159 ; EG-NEXT: AND_INT * T9.X, T7.Z, literal.x,
6160 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6161 ; EG-NEXT: MOV T9.Y, 0.0,
6162 ; EG-NEXT: LSHR * T10.Z, T7.Y, literal.x,
6163 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
6164 ; EG-NEXT: AND_INT T10.X, T7.Y, literal.x,
6165 ; EG-NEXT: MOV T10.Y, 0.0,
6166 ; EG-NEXT: LSHR T7.Z, T7.X, literal.y,
6167 ; EG-NEXT: AND_INT * T7.X, T7.X, literal.x,
6168 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6169 ; EG-NEXT: MOV T7.Y, 0.0,
6170 ; EG-NEXT: MOV T8.W, 0.0,
6171 ; EG-NEXT: MOV * T9.W, 0.0,
6172 ; EG-NEXT: MOV T10.W, 0.0,
6173 ; EG-NEXT: MOV * T7.W, 0.0,
6174 ; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
6175 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6176 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
6177 ; EG-NEXT: LSHR T12.X, PV.W, literal.x,
6178 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6179 ; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
6180 ; EG-NEXT: LSHR T13.X, PV.W, literal.x,
6181 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6182 ; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
6183 ; EG-NEXT: LSHR * T14.X, PV.W, literal.x,
6184 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6186 ; CM-LABEL: global_zextload_v8i16_to_v8i64:
6188 ; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
6190 ; CM-NEXT: ALU 32, @11, KC0[CB0:0-32], KC1[]
6191 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T14.X
6192 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T13.X
6193 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T10, T12.X
6194 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T11.X
6196 ; CM-NEXT: Fetch clause starting at 8:
6197 ; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
6198 ; CM-NEXT: ALU clause starting at 10:
6199 ; CM-NEXT: MOV * T7.X, KC0[2].Z,
6200 ; CM-NEXT: ALU clause starting at 11:
6201 ; CM-NEXT: LSHR * T8.Z, T7.X, literal.x,
6202 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
6203 ; CM-NEXT: AND_INT T8.X, T7.X, literal.x,
6204 ; CM-NEXT: MOV T8.Y, 0.0,
6205 ; CM-NEXT: LSHR * T9.Z, T7.Y, literal.y,
6206 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6207 ; CM-NEXT: AND_INT T9.X, T7.Y, literal.x,
6208 ; CM-NEXT: MOV T9.Y, 0.0,
6209 ; CM-NEXT: LSHR * T10.Z, T7.Z, literal.y,
6210 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6211 ; CM-NEXT: AND_INT T10.X, T7.Z, literal.x,
6212 ; CM-NEXT: MOV T10.Y, 0.0,
6213 ; CM-NEXT: LSHR * T7.Z, T7.W, literal.y,
6214 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6215 ; CM-NEXT: AND_INT T7.X, T7.W, literal.x,
6216 ; CM-NEXT: MOV T7.Y, 0.0,
6217 ; CM-NEXT: MOV * T8.W, 0.0,
6218 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
6219 ; CM-NEXT: MOV * T9.W, 0.0,
6220 ; CM-NEXT: MOV * T10.W, 0.0,
6221 ; CM-NEXT: MOV * T7.W, 0.0,
6222 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
6223 ; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00)
6224 ; CM-NEXT: LSHR T11.X, PV.W, literal.x,
6225 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6226 ; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44)
6227 ; CM-NEXT: LSHR T12.X, PV.W, literal.x,
6228 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6229 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
6230 ; CM-NEXT: LSHR * T13.X, PV.W, literal.x,
6231 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6232 ; CM-NEXT: LSHR * T14.X, KC0[2].Y, literal.x,
6233 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6234 %load = load <8 x i16>, ptr addrspace(1) %in
6235 %ext = zext <8 x i16> %load to <8 x i64>
6236 store <8 x i64> %ext, ptr addrspace(1) %out
6240 define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
6241 ; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i64:
6242 ; GCN-NOHSA-SI: ; %bb.0:
6243 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
6244 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
6245 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
6246 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
6247 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
6248 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
6249 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
6250 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
6251 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6252 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
6253 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
6254 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
6255 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v3
6256 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v2
6257 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v0
6258 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v0, 0, 16
6259 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v2, 0, 16
6260 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[14:15], v[0:1], 48
6261 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[2:3], v[2:3], 48
6262 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v1, 0, 16
6263 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v5, 0, 16
6264 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
6265 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
6266 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v6, 0, 16
6267 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v7, 0, 16
6268 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12
6269 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
6270 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
6271 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 31, v10
6272 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6273 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
6274 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
6275 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
6276 ; GCN-NOHSA-SI-NEXT: s_endpgm
6278 ; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i64:
6280 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
6281 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
6282 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
6283 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
6284 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
6285 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
6286 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6287 ; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3
6288 ; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2
6289 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
6290 ; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1
6291 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6292 ; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0
6293 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 32
6294 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3
6295 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
6296 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2
6297 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s1
6298 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s0
6299 ; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
6300 ; GCN-HSA-NEXT: v_mov_b32_e32 v7, v3
6301 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v2
6302 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v0
6303 ; GCN-HSA-NEXT: v_ashr_i64 v[14:15], v[0:1], 48
6304 ; GCN-HSA-NEXT: v_bfe_i32 v12, v1, 0, 16
6305 ; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16
6306 ; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16
6307 ; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[2:3], 48
6308 ; GCN-HSA-NEXT: v_bfe_i32 v6, v6, 0, 16
6309 ; GCN-HSA-NEXT: v_bfe_i32 v10, v10, 0, 16
6310 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12
6311 ; GCN-HSA-NEXT: v_bfe_i32 v0, v7, 0, 16
6312 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4
6313 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
6314 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6
6315 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10
6316 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0
6317 ; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[12:15]
6318 ; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3]
6319 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[8:11]
6320 ; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[4:7]
6321 ; GCN-HSA-NEXT: s_endpgm
6323 ; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i64:
6324 ; GCN-NOHSA-VI: ; %bb.0:
6325 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
6326 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
6327 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
6328 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
6329 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
6330 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
6331 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
6332 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
6333 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6334 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
6335 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
6336 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
6337 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, v3
6338 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
6339 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v0
6340 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1
6341 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v2
6342 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v11, 0, 16
6343 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v3, 0, 16
6344 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v0, 0, 16
6345 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16
6346 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v2, 0, 16
6347 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v6, 0, 16
6348 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v7, 0, 16
6349 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v10, 0, 16
6350 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12
6351 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14
6352 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
6353 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
6354 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
6355 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
6356 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
6357 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10
6358 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
6359 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
6360 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6361 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
6362 ; GCN-NOHSA-VI-NEXT: s_endpgm
6364 ; EG-LABEL: global_sextload_v8i16_to_v8i64:
6366 ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
6368 ; EG-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[]
6369 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0
6370 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0
6371 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0
6372 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1
6374 ; EG-NEXT: Fetch clause starting at 8:
6375 ; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
6376 ; EG-NEXT: ALU clause starting at 10:
6377 ; EG-NEXT: MOV * T7.X, KC0[2].Z,
6378 ; EG-NEXT: ALU clause starting at 11:
6379 ; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
6380 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6381 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
6382 ; EG-NEXT: LSHR T9.X, PV.W, literal.x,
6383 ; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y,
6384 ; EG-NEXT: ASHR * T10.W, T7.X, literal.z,
6385 ; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
6386 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
6387 ; EG-NEXT: LSHR T11.X, PV.W, literal.x,
6388 ; EG-NEXT: ASHR T10.Z, T7.X, literal.y,
6389 ; EG-NEXT: ASHR * T12.W, T7.Y, literal.z,
6390 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
6391 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
6392 ; EG-NEXT: BFE_INT T10.X, T7.X, 0.0, literal.x,
6393 ; EG-NEXT: ASHR T12.Z, T7.Y, literal.x,
6394 ; EG-NEXT: ASHR * T13.W, T7.Z, literal.y,
6395 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
6396 ; EG-NEXT: BFE_INT T12.X, T7.Y, 0.0, literal.x,
6397 ; EG-NEXT: ASHR T10.Y, PV.X, literal.y,
6398 ; EG-NEXT: ASHR T13.Z, T7.Z, literal.x,
6399 ; EG-NEXT: ASHR * T14.W, T7.W, literal.y,
6400 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
6401 ; EG-NEXT: BFE_INT T13.X, T7.Z, 0.0, literal.x,
6402 ; EG-NEXT: ASHR T12.Y, PV.X, literal.y,
6403 ; EG-NEXT: ASHR * T14.Z, T7.W, literal.x,
6404 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
6405 ; EG-NEXT: BFE_INT T14.X, T7.W, 0.0, literal.x,
6406 ; EG-NEXT: ASHR T13.Y, PV.X, literal.y,
6407 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
6408 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
6409 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
6410 ; EG-NEXT: LSHR T7.X, PV.W, literal.x,
6411 ; EG-NEXT: ASHR * T14.Y, PV.X, literal.y,
6412 ; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44)
6414 ; CM-LABEL: global_sextload_v8i16_to_v8i64:
6416 ; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
6418 ; CM-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[]
6419 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T14.X
6420 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T11.X
6421 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T9.X
6422 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T10, T8.X
6424 ; CM-NEXT: Fetch clause starting at 8:
6425 ; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
6426 ; CM-NEXT: ALU clause starting at 10:
6427 ; CM-NEXT: MOV * T7.X, KC0[2].Z,
6428 ; CM-NEXT: ALU clause starting at 11:
6429 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
6430 ; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00)
6431 ; CM-NEXT: LSHR T8.X, PV.W, literal.x,
6432 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6433 ; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44)
6434 ; CM-NEXT: LSHR T9.X, PV.W, literal.x,
6435 ; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
6436 ; CM-NEXT: ASHR * T10.W, T7.W, literal.z,
6437 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
6438 ; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00)
6439 ; CM-NEXT: LSHR T11.X, PV.Z, literal.x,
6440 ; CM-NEXT: ASHR T10.Z, T7.W, literal.y,
6441 ; CM-NEXT: ASHR * T12.W, T7.Z, literal.z,
6442 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
6443 ; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00)
6444 ; CM-NEXT: BFE_INT T10.X, T7.W, 0.0, literal.x,
6445 ; CM-NEXT: ASHR T12.Z, T7.Z, literal.x,
6446 ; CM-NEXT: ASHR * T13.W, T7.Y, literal.y,
6447 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
6448 ; CM-NEXT: BFE_INT T12.X, T7.Z, 0.0, literal.x,
6449 ; CM-NEXT: ASHR T10.Y, PV.X, literal.y,
6450 ; CM-NEXT: ASHR T13.Z, T7.Y, literal.x,
6451 ; CM-NEXT: ASHR * T7.W, T7.X, literal.y,
6452 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
6453 ; CM-NEXT: BFE_INT T13.X, T7.Y, 0.0, literal.x,
6454 ; CM-NEXT: ASHR T12.Y, PV.X, literal.y,
6455 ; CM-NEXT: ASHR * T7.Z, T7.X, literal.x,
6456 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
6457 ; CM-NEXT: BFE_INT T7.X, T7.X, 0.0, literal.x,
6458 ; CM-NEXT: ASHR * T13.Y, PV.X, literal.y,
6459 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
6460 ; CM-NEXT: LSHR T14.X, KC0[2].Y, literal.x,
6461 ; CM-NEXT: ASHR * T7.Y, PV.X, literal.y,
6462 ; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44)
6463 %load = load <8 x i16>, ptr addrspace(1) %in
6464 %ext = sext <8 x i16> %load to <8 x i64>
6465 store <8 x i64> %ext, ptr addrspace(1) %out
6469 define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
6470 ; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i64:
6471 ; GCN-NOHSA-SI: ; %bb.0:
6472 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
6473 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
6474 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
6475 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
6476 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
6477 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
6478 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
6479 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
6480 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6481 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
6482 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1)
6483 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v1
6484 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v14, 16, v2
6485 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v0
6486 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v0
6487 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v2
6488 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v1
6489 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v3
6490 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v3
6491 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, 0
6492 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
6493 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v22, 16, v5
6494 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v26, 16, v6
6495 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v24, 0xffff, v6
6496 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v4
6497 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v4
6498 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v30, 16, v7
6499 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v28, 0xffff, v7
6500 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v5
6501 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, v21
6502 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v29, v21
6503 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v31, v21
6504 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v21
6505 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v21
6506 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, v21
6507 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, v21
6508 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v21
6509 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v21
6510 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, v21
6511 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, v21
6512 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v21
6513 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, v21
6514 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, v21
6515 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, v21
6516 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
6517 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
6518 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80
6519 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:112
6520 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6521 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
6522 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
6523 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:96
6524 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
6525 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0
6526 ; GCN-NOHSA-SI-NEXT: s_endpgm
6528 ; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i64:
6530 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
6531 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0
6532 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, v8
6533 ; GCN-HSA-NEXT: v_mov_b32_e32 v14, v8
6534 ; GCN-HSA-NEXT: v_mov_b32_e32 v15, v8
6535 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
6536 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
6537 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
6538 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 16
6539 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
6540 ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0
6541 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
6542 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
6543 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
6544 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
6545 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6546 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 16
6547 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
6548 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s5
6549 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s4
6550 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x50
6551 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
6552 ; GCN-HSA-NEXT: v_mov_b32_e32 v17, v8
6553 ; GCN-HSA-NEXT: v_mov_b32_e32 v18, v8
6554 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, v8
6555 ; GCN-HSA-NEXT: v_mov_b32_e32 v10, v8
6556 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, v8
6557 ; GCN-HSA-NEXT: s_waitcnt vmcnt(1)
6558 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v1
6559 ; GCN-HSA-NEXT: v_and_b32_e32 v11, 0xffff, v1
6560 ; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[11:14]
6561 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s5
6562 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s4
6563 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x70
6564 ; GCN-HSA-NEXT: s_waitcnt vmcnt(1)
6565 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v5
6566 ; GCN-HSA-NEXT: v_and_b32_e32 v14, 0xffff, v5
6567 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
6568 ; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[14:17]
6569 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v7
6570 ; GCN-HSA-NEXT: v_mov_b32_e32 v15, s5
6571 ; GCN-HSA-NEXT: v_mov_b32_e32 v14, s4
6572 ; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v7
6573 ; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[17:20]
6574 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v3
6575 ; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3
6576 ; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2
6577 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
6578 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6579 ; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
6580 ; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v3
6581 ; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
6582 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 64
6583 ; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[7:10]
6584 ; GCN-HSA-NEXT: v_mov_b32_e32 v19, s1
6585 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6586 ; GCN-HSA-NEXT: v_mov_b32_e32 v18, s0
6587 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 0x60
6588 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
6589 ; GCN-HSA-NEXT: v_mov_b32_e32 v11, v8
6590 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, v8
6591 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v4
6592 ; GCN-HSA-NEXT: v_and_b32_e32 v14, 0xffff, v4
6593 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s3
6594 ; GCN-HSA-NEXT: v_mov_b32_e32 v27, s1
6595 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v12, 16, v2
6596 ; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v2
6597 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, v8
6598 ; GCN-HSA-NEXT: v_mov_b32_e32 v15, v8
6599 ; GCN-HSA-NEXT: v_mov_b32_e32 v17, v8
6600 ; GCN-HSA-NEXT: v_mov_b32_e32 v7, v8
6601 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, v8
6602 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v6
6603 ; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v6
6604 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2
6605 ; GCN-HSA-NEXT: v_mov_b32_e32 v26, s0
6606 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v22, 16, v0
6607 ; GCN-HSA-NEXT: v_and_b32_e32 v20, 0xffff, v0
6608 ; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[10:13]
6609 ; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[20:23]
6610 ; GCN-HSA-NEXT: flat_store_dwordx4 v[3:4], v[14:17]
6611 ; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[6:9]
6612 ; GCN-HSA-NEXT: s_endpgm
6614 ; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i64:
6615 ; GCN-NOHSA-VI: ; %bb.0:
6616 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
6617 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
6618 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
6619 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
6620 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
6621 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
6622 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
6623 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
6624 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6625 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
6626 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v29, 0
6627 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v31, v29
6628 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
6629 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
6630 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v25, v29
6631 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v27, v29
6632 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v21, v29
6633 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v23, v29
6634 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v17, v29
6635 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, v29
6636 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v29
6637 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v15, v29
6638 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, v29
6639 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, v29
6640 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
6641 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v0
6642 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
6643 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v30, 16, v5
6644 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, 0xffff, v5
6645 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v0
6646 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v14, 16, v1
6647 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v1
6648 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v18, 16, v2
6649 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v2
6650 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v3
6651 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v3
6652 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v22, 16, v6
6653 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v6
6654 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v26, 16, v7
6655 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, 0xffff, v7
6656 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v4
6657 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v4
6658 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v29
6659 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, v29
6660 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v29
6661 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v29
6662 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:80
6663 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
6664 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:112
6665 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:96
6666 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6667 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
6668 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
6669 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0
6670 ; GCN-NOHSA-VI-NEXT: s_endpgm
6672 ; EG-LABEL: global_zextload_v16i16_to_v16i64:
6674 ; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[]
6675 ; EG-NEXT: TEX 1 @12
6676 ; EG-NEXT: ALU 62, @17, KC0[CB0:0-32], KC1[]
6677 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0
6678 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0
6679 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0
6680 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0
6681 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0
6682 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0
6683 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0
6684 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1
6686 ; EG-NEXT: Fetch clause starting at 12:
6687 ; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
6688 ; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
6689 ; EG-NEXT: ALU clause starting at 16:
6690 ; EG-NEXT: MOV * T11.X, KC0[2].Z,
6691 ; EG-NEXT: ALU clause starting at 17:
6692 ; EG-NEXT: LSHR * T13.Z, T12.W, literal.x,
6693 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
6694 ; EG-NEXT: AND_INT T13.X, T12.W, literal.x,
6695 ; EG-NEXT: MOV T13.Y, 0.0,
6696 ; EG-NEXT: LSHR T14.Z, T12.Z, literal.y,
6697 ; EG-NEXT: AND_INT * T14.X, T12.Z, literal.x,
6698 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6699 ; EG-NEXT: MOV T14.Y, 0.0,
6700 ; EG-NEXT: LSHR * T15.Z, T12.Y, literal.x,
6701 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
6702 ; EG-NEXT: AND_INT T15.X, T12.Y, literal.x,
6703 ; EG-NEXT: MOV T15.Y, 0.0,
6704 ; EG-NEXT: LSHR T12.Z, T12.X, literal.y,
6705 ; EG-NEXT: AND_INT * T12.X, T12.X, literal.x,
6706 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6707 ; EG-NEXT: MOV T12.Y, 0.0,
6708 ; EG-NEXT: LSHR * T16.Z, T11.W, literal.x,
6709 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
6710 ; EG-NEXT: AND_INT T16.X, T11.W, literal.x,
6711 ; EG-NEXT: MOV T16.Y, 0.0,
6712 ; EG-NEXT: LSHR T17.Z, T11.Z, literal.y,
6713 ; EG-NEXT: AND_INT * T17.X, T11.Z, literal.x,
6714 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6715 ; EG-NEXT: MOV T17.Y, 0.0,
6716 ; EG-NEXT: LSHR * T18.Z, T11.Y, literal.x,
6717 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
6718 ; EG-NEXT: AND_INT T18.X, T11.Y, literal.x,
6719 ; EG-NEXT: MOV T18.Y, 0.0,
6720 ; EG-NEXT: LSHR T11.Z, T11.X, literal.y,
6721 ; EG-NEXT: AND_INT * T11.X, T11.X, literal.x,
6722 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6723 ; EG-NEXT: MOV T11.Y, 0.0,
6724 ; EG-NEXT: MOV T13.W, 0.0,
6725 ; EG-NEXT: MOV * T14.W, 0.0,
6726 ; EG-NEXT: MOV T15.W, 0.0,
6727 ; EG-NEXT: MOV * T12.W, 0.0,
6728 ; EG-NEXT: MOV T16.W, 0.0,
6729 ; EG-NEXT: MOV * T17.W, 0.0,
6730 ; EG-NEXT: MOV T18.W, 0.0,
6731 ; EG-NEXT: MOV * T11.W, 0.0,
6732 ; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x,
6733 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6734 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
6735 ; EG-NEXT: LSHR T20.X, PV.W, literal.x,
6736 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6737 ; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
6738 ; EG-NEXT: LSHR T21.X, PV.W, literal.x,
6739 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6740 ; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
6741 ; EG-NEXT: LSHR T22.X, PV.W, literal.x,
6742 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6743 ; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
6744 ; EG-NEXT: LSHR T23.X, PV.W, literal.x,
6745 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6746 ; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
6747 ; EG-NEXT: LSHR T24.X, PV.W, literal.x,
6748 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6749 ; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
6750 ; EG-NEXT: LSHR T25.X, PV.W, literal.x,
6751 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6752 ; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
6753 ; EG-NEXT: LSHR * T26.X, PV.W, literal.x,
6754 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6756 ; CM-LABEL: global_zextload_v16i16_to_v16i64:
6758 ; CM-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[]
6759 ; CM-NEXT: TEX 1 @12
6760 ; CM-NEXT: ALU 64, @17, KC0[CB0:0-32], KC1[]
6761 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T26.X
6762 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T14, T25.X
6763 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T24.X
6764 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T23.X
6765 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T16, T22.X
6766 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T21.X
6767 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T18, T20.X
6768 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T19.X
6770 ; CM-NEXT: Fetch clause starting at 12:
6771 ; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1
6772 ; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1
6773 ; CM-NEXT: ALU clause starting at 16:
6774 ; CM-NEXT: MOV * T11.X, KC0[2].Z,
6775 ; CM-NEXT: ALU clause starting at 17:
6776 ; CM-NEXT: LSHR * T13.Z, T12.X, literal.x,
6777 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
6778 ; CM-NEXT: AND_INT T13.X, T12.X, literal.x,
6779 ; CM-NEXT: MOV T13.Y, 0.0,
6780 ; CM-NEXT: LSHR * T14.Z, T12.Y, literal.y,
6781 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6782 ; CM-NEXT: AND_INT T14.X, T12.Y, literal.x,
6783 ; CM-NEXT: MOV T14.Y, 0.0,
6784 ; CM-NEXT: LSHR * T15.Z, T12.Z, literal.y,
6785 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6786 ; CM-NEXT: AND_INT T15.X, T12.Z, literal.x,
6787 ; CM-NEXT: MOV T15.Y, 0.0,
6788 ; CM-NEXT: LSHR * T12.Z, T12.W, literal.y,
6789 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6790 ; CM-NEXT: AND_INT T12.X, T12.W, literal.x,
6791 ; CM-NEXT: MOV T12.Y, 0.0,
6792 ; CM-NEXT: LSHR * T16.Z, T11.X, literal.y,
6793 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6794 ; CM-NEXT: AND_INT T16.X, T11.X, literal.x,
6795 ; CM-NEXT: MOV T16.Y, 0.0,
6796 ; CM-NEXT: LSHR * T17.Z, T11.Y, literal.y,
6797 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6798 ; CM-NEXT: AND_INT T17.X, T11.Y, literal.x,
6799 ; CM-NEXT: MOV T17.Y, 0.0,
6800 ; CM-NEXT: LSHR * T18.Z, T11.Z, literal.y,
6801 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6802 ; CM-NEXT: AND_INT T18.X, T11.Z, literal.x,
6803 ; CM-NEXT: MOV T18.Y, 0.0,
6804 ; CM-NEXT: LSHR * T11.Z, T11.W, literal.y,
6805 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
6806 ; CM-NEXT: AND_INT T11.X, T11.W, literal.x,
6807 ; CM-NEXT: MOV T11.Y, 0.0,
6808 ; CM-NEXT: MOV * T13.W, 0.0,
6809 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
6810 ; CM-NEXT: MOV * T14.W, 0.0,
6811 ; CM-NEXT: MOV * T15.W, 0.0,
6812 ; CM-NEXT: MOV * T12.W, 0.0,
6813 ; CM-NEXT: MOV * T16.W, 0.0,
6814 ; CM-NEXT: MOV * T17.W, 0.0,
6815 ; CM-NEXT: MOV * T18.W, 0.0,
6816 ; CM-NEXT: MOV * T11.W, 0.0,
6817 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
6818 ; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00)
6819 ; CM-NEXT: LSHR T19.X, PV.W, literal.x,
6820 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6821 ; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43)
6822 ; CM-NEXT: LSHR T20.X, PV.W, literal.x,
6823 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6824 ; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43)
6825 ; CM-NEXT: LSHR T21.X, PV.W, literal.x,
6826 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6827 ; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44)
6828 ; CM-NEXT: LSHR T22.X, PV.W, literal.x,
6829 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6830 ; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44)
6831 ; CM-NEXT: LSHR T23.X, PV.W, literal.x,
6832 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6833 ; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44)
6834 ; CM-NEXT: LSHR T24.X, PV.W, literal.x,
6835 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
6836 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
6837 ; CM-NEXT: LSHR * T25.X, PV.W, literal.x,
6838 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6839 ; CM-NEXT: LSHR * T26.X, KC0[2].Y, literal.x,
6840 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6841 %load = load <16 x i16>, ptr addrspace(1) %in
6842 %ext = zext <16 x i16> %load to <16 x i64>
6843 store <16 x i64> %ext, ptr addrspace(1) %out
6847 define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
6848 ; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i64:
6849 ; GCN-NOHSA-SI: ; %bb.0:
6850 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
6851 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
6852 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
6853 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
6854 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
6855 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
6856 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
6857 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
6858 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6859 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
6860 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
6861 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
6862 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
6863 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, v7
6864 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, v3
6865 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v4
6866 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v2
6867 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v14, 16, v0
6868 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v0, 0, 16
6869 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v2, 0, 16
6870 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[18:19], v[0:1], 48
6871 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v1, 0, 16
6872 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[2:3], v[2:3], 48
6873 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v10, 0, 16
6874 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v20, v4, 0, 16
6875 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v14, 0, 16
6876 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v14, v13, 0, 16
6877 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v11, 0, 16
6878 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v9, 0, 16
6879 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[25:26], v[6:7], 48
6880 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
6881 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:112
6882 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
6883 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[26:27], v[4:5], 48
6884 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v24, v5, 0, 16
6885 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v6
6886 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v6, 0, 16
6887 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v1, 0, 16
6888 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
6889 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12
6890 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v21, 31, v20
6891 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
6892 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16
6893 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
6894 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 31, v24
6895 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 31, v10
6896 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 31, v14
6897 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
6898 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
6899 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:80
6900 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6901 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
6902 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
6903 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
6904 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
6905 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0
6906 ; GCN-NOHSA-SI-NEXT: s_endpgm
6908 ; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i64:
6910 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
6911 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
6912 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
6913 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
6914 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
6915 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 16
6916 ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0
6917 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
6918 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
6919 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
6920 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
6921 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6922 ; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3
6923 ; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2
6924 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
6925 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6926 ; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3
6927 ; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2
6928 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
6929 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6930 ; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3
6931 ; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2
6932 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50
6933 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6934 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3
6935 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2
6936 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
6937 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6938 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3
6939 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2
6940 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
6941 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1
6942 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
6943 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0
6944 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 64
6945 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
6946 ; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
6947 ; GCN-HSA-NEXT: v_mov_b32_e32 v27, s1
6948 ; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
6949 ; GCN-HSA-NEXT: v_mov_b32_e32 v26, s0
6950 ; GCN-HSA-NEXT: s_waitcnt vmcnt(1)
6951 ; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[4:5], 48
6952 ; GCN-HSA-NEXT: v_bfe_i32 v8, v5, 0, 16
6953 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
6954 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, v7
6955 ; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11]
6956 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v6
6957 ; GCN-HSA-NEXT: v_bfe_i32 v8, v5, 0, 16
6958 ; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[6:7], 48
6959 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
6960 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v4
6961 ; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11]
6962 ; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16
6963 ; GCN-HSA-NEXT: v_bfe_i32 v9, v16, 0, 16
6964 ; GCN-HSA-NEXT: v_bfe_i32 v4, v4, 0, 16
6965 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v7
6966 ; GCN-HSA-NEXT: v_bfe_i32 v6, v17, 0, 16
6967 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v9
6968 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4
6969 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[7:10]
6970 ; GCN-HSA-NEXT: s_waitcnt vmcnt(3)
6971 ; GCN-HSA-NEXT: v_mov_b32_e32 v11, v3
6972 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6
6973 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v2
6974 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v0
6975 ; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
6976 ; GCN-HSA-NEXT: v_ashr_i64 v[14:15], v[0:1], 48
6977 ; GCN-HSA-NEXT: v_bfe_i32 v12, v1, 0, 16
6978 ; GCN-HSA-NEXT: v_bfe_i32 v8, v0, 0, 16
6979 ; GCN-HSA-NEXT: v_bfe_i32 v4, v2, 0, 16
6980 ; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[2:3], 48
6981 ; GCN-HSA-NEXT: v_bfe_i32 v10, v17, 0, 16
6982 ; GCN-HSA-NEXT: v_bfe_i32 v6, v16, 0, 16
6983 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12
6984 ; GCN-HSA-NEXT: v_bfe_i32 v0, v11, 0, 16
6985 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
6986 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4
6987 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10
6988 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6
6989 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0
6990 ; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[12:15]
6991 ; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3]
6992 ; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7]
6993 ; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11]
6994 ; GCN-HSA-NEXT: s_endpgm
6996 ; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i64:
6997 ; GCN-NOHSA-VI: ; %bb.0:
6998 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
6999 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
7000 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
7001 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
7002 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
7003 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
7004 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
7005 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
7006 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
7007 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
7008 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
7009 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
7010 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
7011 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v0, 0, 16
7012 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v0
7013 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16
7014 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7015 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v2, 0, 16
7016 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v2
7017 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v1, 0, 16
7018 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
7019 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v5
7020 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v1, 0, 16
7021 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4
7022 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v3
7023 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
7024 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v20, 16, v6
7025 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v22, v7
7026 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
7027 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v5, 0, 16
7028 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v27, v6, 0, 16
7029 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v4, 0, 16
7030 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v1, 0, 16
7031 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v9, 0, 16
7032 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v11, 0, 16
7033 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v13, 0, 16
7034 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v3, 0, 16
7035 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
7036 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
7037 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v23, v22, 0, 16
7038 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v25, v7, 0, 16
7039 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v29, v20, 0, 16
7040 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
7041 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
7042 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80
7043 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
7044 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
7045 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12
7046 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v28, 31, v27
7047 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10
7048 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
7049 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14
7050 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19
7051 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
7052 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v30, 31, v29
7053 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
7054 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v26, 31, v25
7055 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
7056 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:112
7057 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:96
7058 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:48
7059 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
7060 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
7061 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0
7062 ; GCN-NOHSA-VI-NEXT: s_endpgm
7064 ; EG-LABEL: global_sextload_v16i16_to_v16i64:
7066 ; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[]
7067 ; EG-NEXT: TEX 1 @12
7068 ; EG-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[]
7069 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0
7070 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0
7071 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0
7072 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0
7073 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0
7074 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0
7075 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0
7076 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1
7078 ; EG-NEXT: Fetch clause starting at 12:
7079 ; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
7080 ; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
7081 ; EG-NEXT: ALU clause starting at 16:
7082 ; EG-NEXT: MOV * T11.X, KC0[2].Z,
7083 ; EG-NEXT: ALU clause starting at 17:
7084 ; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
7085 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7086 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
7087 ; EG-NEXT: LSHR T14.X, PV.W, literal.x,
7088 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7089 ; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
7090 ; EG-NEXT: LSHR T15.X, PV.W, literal.x,
7091 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7092 ; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
7093 ; EG-NEXT: LSHR T16.X, PV.W, literal.x,
7094 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7095 ; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
7096 ; EG-NEXT: LSHR T17.X, PV.W, literal.x,
7097 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7098 ; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
7099 ; EG-NEXT: LSHR T18.X, PV.W, literal.x,
7100 ; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y,
7101 ; EG-NEXT: ASHR * T19.W, T11.X, literal.z,
7102 ; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
7103 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
7104 ; EG-NEXT: LSHR T20.X, PV.W, literal.x,
7105 ; EG-NEXT: ASHR T19.Z, T11.X, literal.y,
7106 ; EG-NEXT: ASHR * T21.W, T11.Y, literal.z,
7107 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
7108 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
7109 ; EG-NEXT: BFE_INT T19.X, T11.X, 0.0, literal.x,
7110 ; EG-NEXT: ASHR T21.Z, T11.Y, literal.x,
7111 ; EG-NEXT: ASHR * T22.W, T11.Z, literal.y,
7112 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7113 ; EG-NEXT: BFE_INT T21.X, T11.Y, 0.0, literal.x,
7114 ; EG-NEXT: ASHR T19.Y, PV.X, literal.y,
7115 ; EG-NEXT: ASHR T22.Z, T11.Z, literal.x,
7116 ; EG-NEXT: ASHR * T23.W, T11.W, literal.y,
7117 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7118 ; EG-NEXT: BFE_INT T22.X, T11.Z, 0.0, literal.x,
7119 ; EG-NEXT: ASHR T21.Y, PV.X, literal.y,
7120 ; EG-NEXT: ASHR T23.Z, T11.W, literal.x,
7121 ; EG-NEXT: ASHR * T24.W, T12.X, literal.y,
7122 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7123 ; EG-NEXT: BFE_INT T23.X, T11.W, 0.0, literal.x,
7124 ; EG-NEXT: ASHR T22.Y, PV.X, literal.y,
7125 ; EG-NEXT: ASHR T24.Z, T12.X, literal.x,
7126 ; EG-NEXT: ASHR * T11.W, T12.Y, literal.y,
7127 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7128 ; EG-NEXT: BFE_INT T24.X, T12.X, 0.0, literal.x,
7129 ; EG-NEXT: ASHR T23.Y, PV.X, literal.y,
7130 ; EG-NEXT: ASHR T11.Z, T12.Y, literal.x,
7131 ; EG-NEXT: ASHR * T25.W, T12.Z, literal.y,
7132 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7133 ; EG-NEXT: BFE_INT T11.X, T12.Y, 0.0, literal.x,
7134 ; EG-NEXT: ASHR T24.Y, PV.X, literal.y,
7135 ; EG-NEXT: ASHR T25.Z, T12.Z, literal.x,
7136 ; EG-NEXT: ASHR * T26.W, T12.W, literal.y,
7137 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7138 ; EG-NEXT: BFE_INT T25.X, T12.Z, 0.0, literal.x,
7139 ; EG-NEXT: ASHR T11.Y, PV.X, literal.y,
7140 ; EG-NEXT: ASHR * T26.Z, T12.W, literal.x,
7141 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7142 ; EG-NEXT: BFE_INT T26.X, T12.W, 0.0, literal.x,
7143 ; EG-NEXT: ASHR T25.Y, PV.X, literal.y,
7144 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
7145 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7146 ; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00)
7147 ; EG-NEXT: LSHR T12.X, PV.W, literal.x,
7148 ; EG-NEXT: ASHR * T26.Y, PV.X, literal.y,
7149 ; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44)
7151 ; CM-LABEL: global_sextload_v16i16_to_v16i64:
7153 ; CM-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[]
7154 ; CM-NEXT: TEX 1 @12
7155 ; CM-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[]
7156 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T26.X
7157 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T20.X
7158 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T18.X
7159 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T17.X
7160 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T16.X
7161 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T22, T15.X
7162 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T21, T14.X
7163 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T13.X
7165 ; CM-NEXT: Fetch clause starting at 12:
7166 ; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1
7167 ; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1
7168 ; CM-NEXT: ALU clause starting at 16:
7169 ; CM-NEXT: MOV * T11.X, KC0[2].Z,
7170 ; CM-NEXT: ALU clause starting at 17:
7171 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
7172 ; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00)
7173 ; CM-NEXT: LSHR T13.X, PV.W, literal.x,
7174 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7175 ; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43)
7176 ; CM-NEXT: LSHR T14.X, PV.W, literal.x,
7177 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7178 ; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43)
7179 ; CM-NEXT: LSHR T15.X, PV.W, literal.x,
7180 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7181 ; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44)
7182 ; CM-NEXT: LSHR T16.X, PV.W, literal.x,
7183 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7184 ; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44)
7185 ; CM-NEXT: LSHR T17.X, PV.W, literal.x,
7186 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7187 ; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44)
7188 ; CM-NEXT: LSHR T18.X, PV.W, literal.x,
7189 ; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
7190 ; CM-NEXT: ASHR * T19.W, T11.W, literal.z,
7191 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
7192 ; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00)
7193 ; CM-NEXT: LSHR T20.X, PV.Z, literal.x,
7194 ; CM-NEXT: ASHR T19.Z, T11.W, literal.y,
7195 ; CM-NEXT: ASHR * T21.W, T11.Z, literal.z,
7196 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
7197 ; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00)
7198 ; CM-NEXT: BFE_INT T19.X, T11.W, 0.0, literal.x,
7199 ; CM-NEXT: ASHR T21.Z, T11.Z, literal.x,
7200 ; CM-NEXT: ASHR * T22.W, T11.Y, literal.y,
7201 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7202 ; CM-NEXT: BFE_INT T21.X, T11.Z, 0.0, literal.x,
7203 ; CM-NEXT: ASHR T19.Y, PV.X, literal.y,
7204 ; CM-NEXT: ASHR T22.Z, T11.Y, literal.x,
7205 ; CM-NEXT: ASHR * T11.W, T11.X, literal.y,
7206 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7207 ; CM-NEXT: BFE_INT T22.X, T11.Y, 0.0, literal.x,
7208 ; CM-NEXT: ASHR T21.Y, PV.X, literal.y,
7209 ; CM-NEXT: ASHR T11.Z, T11.X, literal.x,
7210 ; CM-NEXT: ASHR * T23.W, T12.W, literal.y,
7211 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7212 ; CM-NEXT: BFE_INT T11.X, T11.X, 0.0, literal.x,
7213 ; CM-NEXT: ASHR T22.Y, PV.X, literal.y,
7214 ; CM-NEXT: ASHR T23.Z, T12.W, literal.x,
7215 ; CM-NEXT: ASHR * T24.W, T12.Z, literal.y,
7216 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7217 ; CM-NEXT: BFE_INT T23.X, T12.W, 0.0, literal.x,
7218 ; CM-NEXT: ASHR T11.Y, PV.X, literal.y,
7219 ; CM-NEXT: ASHR T24.Z, T12.Z, literal.x,
7220 ; CM-NEXT: ASHR * T25.W, T12.Y, literal.y,
7221 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7222 ; CM-NEXT: BFE_INT T24.X, T12.Z, 0.0, literal.x,
7223 ; CM-NEXT: ASHR T23.Y, PV.X, literal.y,
7224 ; CM-NEXT: ASHR T25.Z, T12.Y, literal.x,
7225 ; CM-NEXT: ASHR * T12.W, T12.X, literal.y,
7226 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7227 ; CM-NEXT: BFE_INT T25.X, T12.Y, 0.0, literal.x,
7228 ; CM-NEXT: ASHR T24.Y, PV.X, literal.y,
7229 ; CM-NEXT: ASHR * T12.Z, T12.X, literal.x,
7230 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7231 ; CM-NEXT: BFE_INT T12.X, T12.X, 0.0, literal.x,
7232 ; CM-NEXT: ASHR * T25.Y, PV.X, literal.y,
7233 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
7234 ; CM-NEXT: LSHR T26.X, KC0[2].Y, literal.x,
7235 ; CM-NEXT: ASHR * T12.Y, PV.X, literal.y,
7236 ; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44)
7237 %load = load <16 x i16>, ptr addrspace(1) %in
7238 %ext = sext <16 x i16> %load to <16 x i64>
7239 store <16 x i64> %ext, ptr addrspace(1) %out
7243 define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
7244 ; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i64:
7245 ; GCN-NOHSA-SI: ; %bb.0:
7246 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
7247 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
7248 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, -1
7249 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s15, 0xe8f000
7250 ; GCN-NOHSA-SI-NEXT: s_add_u32 s12, s12, s9
7251 ; GCN-NOHSA-SI-NEXT: s_addc_u32 s13, s13, 0
7252 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
7253 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
7254 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
7255 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v39, 0
7256 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
7257 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
7258 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
7259 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
7260 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
7261 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[14:17], off, s[8:11], 0
7262 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[18:21], off, s[8:11], 0 offset:16
7263 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[22:25], off, s[8:11], 0 offset:32
7264 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[26:29], off, s[8:11], 0 offset:48
7265 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3)
7266 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v15
7267 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v17
7268 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2)
7269 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v20
7270 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v16
7271 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v14
7272 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v14
7273 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
7274 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
7275 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
7276 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
7277 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
7278 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v16
7279 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, v3
7280 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, 0xffff, v15
7281 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, 0xffff, v17
7282 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v18
7283 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v18
7284 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v20
7285 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v5
7286 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v19
7287 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v19
7288 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v44, 16, v21
7289 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v42, 0xffff, v21
7290 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v48, 16, v22
7291 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v46, 0xffff, v22
7292 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v22, 16, v24
7293 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v24
7294 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v52, 16, v23
7295 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v50, 0xffff, v23
7296 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v56, 16, v25
7297 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v54, 0xffff, v25
7298 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v40, 16, v29
7299 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v60, 16, v26
7300 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v58, 0xffff, v26
7301 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v26, 16, v28
7302 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v24, 0xffff, v28
7303 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1)
7304 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v27
7305 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v27
7306 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v38, 0xffff, v29
7307 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v41, v39
7308 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v39
7309 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
7310 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v39
7311 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v55, v39
7312 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v57, v39
7313 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v51, v39
7314 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v53, v39
7315 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v43, v39
7316 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v45, v39
7317 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, v39
7318 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, v39
7319 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v35, v39
7320 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v37, v39
7321 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v31, v39
7322 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v33, v39
7323 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, v39
7324 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, v39
7325 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v59, v39
7326 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v61, v39
7327 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, v39
7328 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, v39
7329 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v47, v39
7330 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v49, v39
7331 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v39
7332 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v39
7333 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, v39
7334 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, v39
7335 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v39
7336 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, v39
7337 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v12, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill
7338 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
7339 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v13, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill
7340 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v14, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill
7341 ; GCN-NOHSA-SI-NEXT: buffer_store_dword v15, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
7342 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
7343 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v12, off, s[12:15], 0 ; 4-byte Folded Reload
7344 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v13, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
7345 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v14, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
7346 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v15, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
7347 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
7348 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v39
7349 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, v39
7350 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
7351 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
7352 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[38:41], off, s[0:3], 0 offset:240
7353 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
7354 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[54:57], off, s[0:3], 0 offset:176
7355 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[50:53], off, s[0:3], 0 offset:144
7356 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[42:45], off, s[0:3], 0 offset:112
7357 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80
7358 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[34:37], off, s[0:3], 0 offset:48
7359 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[30:33], off, s[0:3], 0 offset:16
7360 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:224
7361 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[58:61], off, s[0:3], 0 offset:192
7362 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160
7363 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[46:49], off, s[0:3], 0 offset:128
7364 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
7365 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64
7366 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
7367 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload
7368 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload
7369 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload
7370 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
7371 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
7372 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0
7373 ; GCN-NOHSA-SI-NEXT: s_endpgm
7375 ; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i64:
7377 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
7378 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
7379 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 16
7380 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
7381 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
7382 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
7383 ; GCN-HSA-NEXT: flat_load_dwordx4 v[2:5], v[0:1]
7384 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 32
7385 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
7386 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
7387 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
7388 ; GCN-HSA-NEXT: flat_load_dwordx4 v[6:9], v[0:1]
7389 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
7390 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
7391 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 48
7392 ; GCN-HSA-NEXT: flat_load_dwordx4 v[10:13], v[0:1]
7393 ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0
7394 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
7395 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
7396 ; GCN-HSA-NEXT: flat_load_dwordx4 v[14:17], v[0:1]
7397 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
7398 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
7399 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 16
7400 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
7401 ; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xf0
7402 ; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0
7403 ; GCN-HSA-NEXT: s_add_u32 s8, s0, 0xd0
7404 ; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0
7405 ; GCN-HSA-NEXT: s_add_u32 s10, s0, 0xb0
7406 ; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0
7407 ; GCN-HSA-NEXT: s_add_u32 s12, s0, 0x90
7408 ; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0
7409 ; GCN-HSA-NEXT: s_add_u32 s14, s0, 0x70
7410 ; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0
7411 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s15
7412 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0
7413 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s14
7414 ; GCN-HSA-NEXT: s_add_u32 s14, s0, 0x50
7415 ; GCN-HSA-NEXT: v_mov_b32_e32 v19, v1
7416 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, v1
7417 ; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0
7418 ; GCN-HSA-NEXT: v_mov_b32_e32 v24, v1
7419 ; GCN-HSA-NEXT: s_waitcnt vmcnt(3)
7420 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v5
7421 ; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v5
7422 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21]
7423 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s15
7424 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s14
7425 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v3
7426 ; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v3
7427 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21]
7428 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s11
7429 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s10
7430 ; GCN-HSA-NEXT: s_waitcnt vmcnt(4)
7431 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v9
7432 ; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v9
7433 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21]
7434 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s13
7435 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s12
7436 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v7
7437 ; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v7
7438 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21]
7439 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s5
7440 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s4
7441 ; GCN-HSA-NEXT: s_waitcnt vmcnt(5)
7442 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v11
7443 ; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v11
7444 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21]
7445 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s7
7446 ; GCN-HSA-NEXT: v_mov_b32_e32 v18, v1
7447 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, v1
7448 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s6
7449 ; GCN-HSA-NEXT: s_waitcnt vmcnt(5)
7450 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v17
7451 ; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v17
7452 ; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20]
7453 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s9
7454 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 32
7455 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s8
7456 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v15
7457 ; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v15
7458 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
7459 ; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20]
7460 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s5
7461 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s4
7462 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v12
7463 ; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v12
7464 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s1
7465 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xe0
7466 ; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20]
7467 ; GCN-HSA-NEXT: v_mov_b32_e32 v11, s0
7468 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v10
7469 ; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v10
7470 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
7471 ; GCN-HSA-NEXT: flat_store_dwordx4 v[11:12], v[17:20]
7472 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v16
7473 ; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v16
7474 ; GCN-HSA-NEXT: v_mov_b32_e32 v16, s5
7475 ; GCN-HSA-NEXT: v_mov_b32_e32 v10, v1
7476 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, v1
7477 ; GCN-HSA-NEXT: v_mov_b32_e32 v15, s4
7478 ; GCN-HSA-NEXT: flat_store_dwordx4 v[15:16], v[9:12]
7479 ; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v13
7480 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v2
7481 ; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v2
7482 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v13
7483 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3
7484 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1
7485 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2
7486 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0
7487 ; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[0:3]
7488 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
7489 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
7490 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v23, 16, v14
7491 ; GCN-HSA-NEXT: v_and_b32_e32 v21, 0xffff, v14
7492 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, v1
7493 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
7494 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0
7495 ; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[21:24]
7496 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
7497 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
7498 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v8
7499 ; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v8
7500 ; GCN-HSA-NEXT: v_mov_b32_e32 v19, v1
7501 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, v1
7502 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
7503 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80
7504 ; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[18:21]
7505 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
7506 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
7507 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
7508 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
7509 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
7510 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v6
7511 ; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v6
7512 ; GCN-HSA-NEXT: v_mov_b32_e32 v16, v1
7513 ; GCN-HSA-NEXT: v_mov_b32_e32 v18, v1
7514 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 64
7515 ; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[15:18]
7516 ; GCN-HSA-NEXT: v_mov_b32_e32 v6, v1
7517 ; GCN-HSA-NEXT: v_mov_b32_e32 v8, v1
7518 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
7519 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, v1
7520 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
7521 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
7522 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v4
7523 ; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v4
7524 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
7525 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
7526 ; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[5:8]
7527 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[9:12]
7528 ; GCN-HSA-NEXT: s_endpgm
7530 ; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i64:
7531 ; GCN-NOHSA-VI: ; %bb.0:
7532 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
7533 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
7534 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
7535 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
7536 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
7537 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
7538 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
7539 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
7540 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[7:10], off, s[8:11], 0
7541 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[13:16], off, s[8:11], 0 offset:16
7542 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
7543 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
7544 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
7545 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v10
7546 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
7547 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v14
7548 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v17, 0xffff, v14
7549 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v23, 16, v13
7550 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v21, 0xffff, v13
7551 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v27, 16, v16
7552 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v25, 0xffff, v16
7553 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v30, 16, v15
7554 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, 0xffff, v15
7555 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[13:16], off, s[8:11], 0 offset:32
7556 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[31:34], off, s[8:11], 0 offset:48
7557 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v8
7558 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v8
7559 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v7
7560 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v3, 0xffff, v7
7561 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v8, 16, v10
7562 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v12, 16, v9
7563 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v9
7564 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
7565 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v40, 16, v13
7566 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v38, 0xffff, v13
7567 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v46, 16, v15
7568 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v44, 0xffff, v15
7569 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
7570 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v32
7571 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v13, 0xffff, v32
7572 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v52, 16, v34
7573 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v50, 0xffff, v34
7574 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v34, 16, v33
7575 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v32, 0xffff, v33
7576 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v33, 0
7577 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v51, v33
7578 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v53, v33
7579 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v49, 16, v31
7580 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v47, 0xffff, v31
7581 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[50:53], off, s[0:3], 0 offset:240
7582 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v48, v33
7583 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v50, v33
7584 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:192
7585 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v45, v33
7586 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v47, v33
7587 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v43, 16, v16
7588 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v41, 0xffff, v16
7589 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:160
7590 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v42, v33
7591 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v44, v33
7592 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[41:44], off, s[0:3], 0 offset:176
7593 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v39, v33
7594 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v41, v33
7595 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v29, v33
7596 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v31, v33
7597 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v37, 16, v14
7598 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v35, 0xffff, v14
7599 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v14, v33
7600 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v16, v33
7601 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[38:41], off, s[0:3], 0 offset:128
7602 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v36, v33
7603 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v38, v33
7604 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:96
7605 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v26, v33
7606 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v28, v33
7607 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:208
7608 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:144
7609 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v22, v33
7610 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v35, v33
7611 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v24, v33
7612 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v18, v33
7613 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v20, v33
7614 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, v33
7615 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v33
7616 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, v33
7617 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, v33
7618 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, v33
7619 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:112
7620 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:224
7621 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:64
7622 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:80
7623 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:32
7624 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:48
7625 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v33
7626 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, v33
7627 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0
7628 ; GCN-NOHSA-VI-NEXT: s_nop 0
7629 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v33
7630 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
7631 ; GCN-NOHSA-VI-NEXT: s_endpgm
7633 ; EG-LABEL: global_zextload_v32i16_to_v32i64:
7635 ; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[]
7636 ; EG-NEXT: TEX 2 @22
7637 ; EG-NEXT: ALU 33, @31, KC0[], KC1[]
7638 ; EG-NEXT: TEX 0 @28
7639 ; EG-NEXT: ALU 93, @65, KC0[CB0:0-32], KC1[]
7640 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0
7641 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0
7642 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0
7643 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0
7644 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0
7645 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0
7646 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0
7647 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0
7648 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0
7649 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0
7650 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0
7651 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0
7652 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0
7653 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0
7654 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0
7655 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1
7657 ; EG-NEXT: Fetch clause starting at 22:
7658 ; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1
7659 ; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 16, #1
7660 ; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1
7661 ; EG-NEXT: Fetch clause starting at 28:
7662 ; EG-NEXT: VTX_READ_128 T29.XYZW, T19.X, 0, #1
7663 ; EG-NEXT: ALU clause starting at 30:
7664 ; EG-NEXT: MOV * T19.X, KC0[2].Z,
7665 ; EG-NEXT: ALU clause starting at 31:
7666 ; EG-NEXT: LSHR * T23.Z, T20.Z, literal.x,
7667 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7668 ; EG-NEXT: AND_INT T23.X, T20.Z, literal.x,
7669 ; EG-NEXT: MOV T23.Y, 0.0,
7670 ; EG-NEXT: LSHR T24.Z, T20.W, literal.y,
7671 ; EG-NEXT: AND_INT * T24.X, T20.W, literal.x,
7672 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7673 ; EG-NEXT: MOV T24.Y, 0.0,
7674 ; EG-NEXT: LSHR * T25.Z, T20.X, literal.x,
7675 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7676 ; EG-NEXT: AND_INT T25.X, T20.X, literal.x,
7677 ; EG-NEXT: MOV T25.Y, 0.0,
7678 ; EG-NEXT: LSHR T20.Z, T20.Y, literal.y,
7679 ; EG-NEXT: AND_INT * T20.X, T20.Y, literal.x,
7680 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7681 ; EG-NEXT: MOV T20.Y, 0.0,
7682 ; EG-NEXT: LSHR * T26.Z, T22.Z, literal.x,
7683 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7684 ; EG-NEXT: AND_INT T26.X, T22.Z, literal.x,
7685 ; EG-NEXT: MOV T26.Y, 0.0,
7686 ; EG-NEXT: LSHR T27.Z, T22.W, literal.y,
7687 ; EG-NEXT: AND_INT * T27.X, T22.W, literal.x,
7688 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7689 ; EG-NEXT: MOV T27.Y, 0.0,
7690 ; EG-NEXT: LSHR * T28.Z, T22.X, literal.x,
7691 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7692 ; EG-NEXT: AND_INT T28.X, T22.X, literal.x,
7693 ; EG-NEXT: MOV T28.Y, 0.0,
7694 ; EG-NEXT: LSHR T22.Z, T22.Y, literal.y,
7695 ; EG-NEXT: AND_INT * T22.X, T22.Y, literal.x,
7696 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7697 ; EG-NEXT: MOV T22.Y, 0.0,
7698 ; EG-NEXT: LSHR * T19.Z, T21.Z, literal.x,
7699 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7700 ; EG-NEXT: ALU clause starting at 65:
7701 ; EG-NEXT: AND_INT T19.X, T21.Z, literal.x,
7702 ; EG-NEXT: MOV T19.Y, 0.0,
7703 ; EG-NEXT: LSHR T30.Z, T21.W, literal.y,
7704 ; EG-NEXT: AND_INT * T30.X, T21.W, literal.x,
7705 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7706 ; EG-NEXT: MOV T30.Y, 0.0,
7707 ; EG-NEXT: LSHR * T31.Z, T21.X, literal.x,
7708 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7709 ; EG-NEXT: AND_INT T31.X, T21.X, literal.x,
7710 ; EG-NEXT: MOV T31.Y, 0.0,
7711 ; EG-NEXT: LSHR T21.Z, T21.Y, literal.y,
7712 ; EG-NEXT: AND_INT * T21.X, T21.Y, literal.x,
7713 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7714 ; EG-NEXT: MOV T21.Y, 0.0,
7715 ; EG-NEXT: LSHR * T32.Z, T29.Z, literal.x,
7716 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7717 ; EG-NEXT: AND_INT T32.X, T29.Z, literal.x,
7718 ; EG-NEXT: MOV T32.Y, 0.0,
7719 ; EG-NEXT: LSHR T33.Z, T29.W, literal.y,
7720 ; EG-NEXT: AND_INT * T33.X, T29.W, literal.x,
7721 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7722 ; EG-NEXT: MOV T33.Y, 0.0,
7723 ; EG-NEXT: LSHR * T34.Z, T29.X, literal.x,
7724 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7725 ; EG-NEXT: AND_INT T34.X, T29.X, literal.x,
7726 ; EG-NEXT: MOV T34.Y, 0.0,
7727 ; EG-NEXT: LSHR T29.Z, T29.Y, literal.y,
7728 ; EG-NEXT: AND_INT * T29.X, T29.Y, literal.x,
7729 ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7730 ; EG-NEXT: MOV T29.Y, 0.0,
7731 ; EG-NEXT: MOV T23.W, 0.0,
7732 ; EG-NEXT: MOV * T24.W, 0.0,
7733 ; EG-NEXT: MOV T25.W, 0.0,
7734 ; EG-NEXT: MOV * T20.W, 0.0,
7735 ; EG-NEXT: MOV T26.W, 0.0,
7736 ; EG-NEXT: MOV * T27.W, 0.0,
7737 ; EG-NEXT: MOV T28.W, 0.0,
7738 ; EG-NEXT: MOV * T22.W, 0.0,
7739 ; EG-NEXT: MOV T19.W, 0.0,
7740 ; EG-NEXT: MOV * T30.W, 0.0,
7741 ; EG-NEXT: MOV T31.W, 0.0,
7742 ; EG-NEXT: MOV * T21.W, 0.0,
7743 ; EG-NEXT: MOV T32.W, 0.0,
7744 ; EG-NEXT: MOV * T33.W, 0.0,
7745 ; EG-NEXT: MOV T34.W, 0.0,
7746 ; EG-NEXT: MOV * T29.W, 0.0,
7747 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
7748 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7749 ; EG-NEXT: LSHR T35.X, PV.W, literal.x,
7750 ; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.x,
7751 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
7752 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
7753 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
7754 ; EG-NEXT: LSHR T37.X, PV.W, literal.x,
7755 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7756 ; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
7757 ; EG-NEXT: LSHR T38.X, PV.W, literal.x,
7758 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7759 ; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
7760 ; EG-NEXT: LSHR T39.X, PV.W, literal.x,
7761 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7762 ; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
7763 ; EG-NEXT: LSHR T40.X, PV.W, literal.x,
7764 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7765 ; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
7766 ; EG-NEXT: LSHR T41.X, PV.W, literal.x,
7767 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7768 ; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
7769 ; EG-NEXT: LSHR T42.X, PV.W, literal.x,
7770 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7771 ; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43)
7772 ; EG-NEXT: LSHR T43.X, PV.W, literal.x,
7773 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7774 ; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43)
7775 ; EG-NEXT: LSHR T44.X, PV.W, literal.x,
7776 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7777 ; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43)
7778 ; EG-NEXT: LSHR T45.X, PV.W, literal.x,
7779 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7780 ; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43)
7781 ; EG-NEXT: LSHR T46.X, PV.W, literal.x,
7782 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7783 ; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43)
7784 ; EG-NEXT: LSHR T47.X, PV.W, literal.x,
7785 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7786 ; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43)
7787 ; EG-NEXT: LSHR T48.X, PV.W, literal.x,
7788 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7789 ; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43)
7790 ; EG-NEXT: LSHR T49.X, PV.W, literal.x,
7791 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7792 ; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43)
7793 ; EG-NEXT: LSHR * T50.X, PV.W, literal.x,
7794 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
7796 ; CM-LABEL: global_zextload_v32i16_to_v32i64:
7798 ; CM-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[]
7799 ; CM-NEXT: TEX 2 @22
7800 ; CM-NEXT: ALU 33, @31, KC0[], KC1[]
7801 ; CM-NEXT: TEX 0 @28
7802 ; CM-NEXT: ALU 94, @65, KC0[CB0:0-32], KC1[]
7803 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T50.X
7804 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T49.X
7805 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T48.X
7806 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T26, T47.X
7807 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T21, T46.X
7808 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T27, T45.X
7809 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T28, T44.X
7810 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T29, T43.X
7811 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T42.X
7812 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T41.X
7813 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T40.X
7814 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T32, T39.X
7815 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T22, T38.X
7816 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T37.X
7817 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T34, T36.X
7818 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T23.X
7820 ; CM-NEXT: Fetch clause starting at 22:
7821 ; CM-NEXT: VTX_READ_128 T21.XYZW, T20.X, 0, #1
7822 ; CM-NEXT: VTX_READ_128 T22.XYZW, T20.X, 32, #1
7823 ; CM-NEXT: VTX_READ_128 T23.XYZW, T20.X, 16, #1
7824 ; CM-NEXT: Fetch clause starting at 28:
7825 ; CM-NEXT: VTX_READ_128 T23.XYZW, T20.X, 48, #1
7826 ; CM-NEXT: ALU clause starting at 30:
7827 ; CM-NEXT: MOV * T20.X, KC0[2].Z,
7828 ; CM-NEXT: ALU clause starting at 31:
7829 ; CM-NEXT: LSHR * T19.Z, T21.Y, literal.x,
7830 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7831 ; CM-NEXT: AND_INT T19.X, T21.Y, literal.x,
7832 ; CM-NEXT: MOV T19.Y, 0.0,
7833 ; CM-NEXT: LSHR * T24.Z, T21.X, literal.y,
7834 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7835 ; CM-NEXT: AND_INT T24.X, T21.X, literal.x,
7836 ; CM-NEXT: MOV T24.Y, 0.0,
7837 ; CM-NEXT: LSHR * T25.Z, T21.W, literal.y,
7838 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7839 ; CM-NEXT: AND_INT T25.X, T21.W, literal.x,
7840 ; CM-NEXT: MOV T25.Y, 0.0,
7841 ; CM-NEXT: LSHR * T26.Z, T21.Z, literal.y,
7842 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7843 ; CM-NEXT: AND_INT T26.X, T21.Z, literal.x,
7844 ; CM-NEXT: MOV T26.Y, 0.0,
7845 ; CM-NEXT: LSHR * T21.Z, T23.Y, literal.y,
7846 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7847 ; CM-NEXT: AND_INT T21.X, T23.Y, literal.x,
7848 ; CM-NEXT: MOV T21.Y, 0.0,
7849 ; CM-NEXT: LSHR * T27.Z, T23.X, literal.y,
7850 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7851 ; CM-NEXT: AND_INT T27.X, T23.X, literal.x,
7852 ; CM-NEXT: MOV T27.Y, 0.0,
7853 ; CM-NEXT: LSHR * T28.Z, T23.W, literal.y,
7854 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7855 ; CM-NEXT: AND_INT T28.X, T23.W, literal.x,
7856 ; CM-NEXT: MOV T28.Y, 0.0,
7857 ; CM-NEXT: LSHR * T29.Z, T23.Z, literal.y,
7858 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7859 ; CM-NEXT: AND_INT T29.X, T23.Z, literal.x,
7860 ; CM-NEXT: MOV T29.Y, 0.0,
7861 ; CM-NEXT: LSHR * T20.Z, T22.Y, literal.y,
7862 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7863 ; CM-NEXT: ALU clause starting at 65:
7864 ; CM-NEXT: AND_INT T20.X, T22.Y, literal.x,
7865 ; CM-NEXT: MOV T20.Y, 0.0,
7866 ; CM-NEXT: LSHR * T30.Z, T22.X, literal.y,
7867 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7868 ; CM-NEXT: AND_INT T30.X, T22.X, literal.x,
7869 ; CM-NEXT: MOV T30.Y, 0.0,
7870 ; CM-NEXT: LSHR * T31.Z, T22.W, literal.y,
7871 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7872 ; CM-NEXT: AND_INT T31.X, T22.W, literal.x,
7873 ; CM-NEXT: MOV T31.Y, 0.0,
7874 ; CM-NEXT: LSHR * T32.Z, T22.Z, literal.y,
7875 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7876 ; CM-NEXT: AND_INT T32.X, T22.Z, literal.x,
7877 ; CM-NEXT: MOV T32.Y, 0.0,
7878 ; CM-NEXT: LSHR * T22.Z, T23.Y, literal.y,
7879 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7880 ; CM-NEXT: AND_INT T22.X, T23.Y, literal.x,
7881 ; CM-NEXT: MOV T22.Y, 0.0,
7882 ; CM-NEXT: LSHR * T33.Z, T23.X, literal.y,
7883 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7884 ; CM-NEXT: AND_INT T33.X, T23.X, literal.x,
7885 ; CM-NEXT: MOV T33.Y, 0.0,
7886 ; CM-NEXT: LSHR * T34.Z, T23.W, literal.y,
7887 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7888 ; CM-NEXT: AND_INT T34.X, T23.W, literal.x,
7889 ; CM-NEXT: MOV T34.Y, 0.0,
7890 ; CM-NEXT: LSHR * T35.Z, T23.Z, literal.y,
7891 ; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
7892 ; CM-NEXT: AND_INT T35.X, T23.Z, literal.x,
7893 ; CM-NEXT: MOV T35.Y, 0.0,
7894 ; CM-NEXT: MOV * T19.W, 0.0,
7895 ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
7896 ; CM-NEXT: MOV * T24.W, 0.0,
7897 ; CM-NEXT: MOV * T25.W, 0.0,
7898 ; CM-NEXT: MOV * T26.W, 0.0,
7899 ; CM-NEXT: MOV * T21.W, 0.0,
7900 ; CM-NEXT: MOV * T27.W, 0.0,
7901 ; CM-NEXT: MOV * T28.W, 0.0,
7902 ; CM-NEXT: MOV * T29.W, 0.0,
7903 ; CM-NEXT: MOV * T20.W, 0.0,
7904 ; CM-NEXT: MOV * T30.W, 0.0,
7905 ; CM-NEXT: MOV * T31.W, 0.0,
7906 ; CM-NEXT: MOV * T32.W, 0.0,
7907 ; CM-NEXT: MOV * T22.W, 0.0,
7908 ; CM-NEXT: MOV * T33.W, 0.0,
7909 ; CM-NEXT: MOV * T34.W, 0.0,
7910 ; CM-NEXT: MOV * T35.W, 0.0,
7911 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
7912 ; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00)
7913 ; CM-NEXT: LSHR T23.X, PV.W, literal.x,
7914 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7915 ; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43)
7916 ; CM-NEXT: LSHR T36.X, PV.W, literal.x,
7917 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7918 ; CM-NEXT: 2(2.802597e-45), 192(2.690493e-43)
7919 ; CM-NEXT: LSHR T37.X, PV.W, literal.x,
7920 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7921 ; CM-NEXT: 2(2.802597e-45), 208(2.914701e-43)
7922 ; CM-NEXT: LSHR T38.X, PV.W, literal.x,
7923 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7924 ; CM-NEXT: 2(2.802597e-45), 160(2.242078e-43)
7925 ; CM-NEXT: LSHR T39.X, PV.W, literal.x,
7926 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7927 ; CM-NEXT: 2(2.802597e-45), 176(2.466285e-43)
7928 ; CM-NEXT: LSHR T40.X, PV.W, literal.x,
7929 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7930 ; CM-NEXT: 2(2.802597e-45), 128(1.793662e-43)
7931 ; CM-NEXT: LSHR T41.X, PV.W, literal.x,
7932 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7933 ; CM-NEXT: 2(2.802597e-45), 144(2.017870e-43)
7934 ; CM-NEXT: LSHR T42.X, PV.W, literal.x,
7935 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7936 ; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43)
7937 ; CM-NEXT: LSHR T43.X, PV.W, literal.x,
7938 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7939 ; CM-NEXT: 2(2.802597e-45), 112(1.569454e-43)
7940 ; CM-NEXT: LSHR T44.X, PV.W, literal.x,
7941 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7942 ; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44)
7943 ; CM-NEXT: LSHR T45.X, PV.W, literal.x,
7944 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7945 ; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43)
7946 ; CM-NEXT: LSHR T46.X, PV.W, literal.x,
7947 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7948 ; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44)
7949 ; CM-NEXT: LSHR T47.X, PV.W, literal.x,
7950 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7951 ; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44)
7952 ; CM-NEXT: LSHR * T48.X, PV.W, literal.x,
7953 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
7954 ; CM-NEXT: LSHR T49.X, KC0[2].Y, literal.x,
7955 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
7956 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
7957 ; CM-NEXT: LSHR * T50.X, PV.W, literal.x,
7958 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
7959 %load = load <32 x i16>, ptr addrspace(1) %in
7960 %ext = zext <32 x i16> %load to <32 x i64>
7961 store <32 x i64> %ext, ptr addrspace(1) %out
7965 define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
7966 ; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i64:
7967 ; GCN-NOHSA-SI: ; %bb.0:
7968 ; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
7969 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
7970 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
7971 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2
7972 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3
7973 ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
7974 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6
7975 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7
7976 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0
7977 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16
7978 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32
7979 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48
7980 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
7981 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
7982 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
7983 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, v3
7984 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v26, v7
7985 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, v11
7986 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, v15
7987 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v20, 16, v2
7988 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v16, 16, v4
7989 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v10
7990 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v8
7991 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v14
7992 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v22, 0, 16
7993 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[2:3], 48
7994 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
7995 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:240
7996 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
7997 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[0:1], 48
7998 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v1, 0, 16
7999 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
8000 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:208
8001 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v12
8002 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
8003 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v26, 0, 16
8004 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[6:7], 48
8005 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
8006 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:176
8007 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
8008 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[4:5], 48
8009 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v5, 0, 16
8010 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
8011 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:144
8012 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
8013 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[10:11], 48
8014 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v27, 0, 16
8015 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
8016 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:112
8017 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
8018 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[24:25], v[8:9], 48
8019 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v9, 0, 16
8020 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
8021 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:80
8022 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
8023 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[23:24], v[14:15], 48
8024 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v21, v21, 0, 16
8025 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
8026 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:48
8027 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
8028 ; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[23:24], v[12:13], 48
8029 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v21, v13, 0, 16
8030 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
8031 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:16
8032 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v1, v12, 0, 16
8033 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v5, v14, 0, 16
8034 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v3, v3, 0, 16
8035 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v20, 0, 16
8036 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v11, v2, 0, 16
8037 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
8038 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
8039 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:224
8040 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v9, v8, 0, 16
8041 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
8042 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v10, 0, 16
8043 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v7, v19, 0, 16
8044 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v11, v18, 0, 16
8045 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v17, 0, 16
8046 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v19, v16, 0, 16
8047 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v17, v4, 0, 16
8048 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v6
8049 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v21, v6, 0, 16
8050 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v2, 0, 16
8051 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0
8052 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v25, v0, 0, 16
8053 ; GCN-NOHSA-SI-NEXT: v_bfe_i32 v27, v2, 0, 16
8054 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v2, 31, v1
8055 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5
8056 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v10, 31, v9
8057 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
8058 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
8059 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
8060 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 31, v25
8061 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3
8062 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v8, 31, v7
8063 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
8064 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
8065 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v20, 31, v19
8066 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
8067 ; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v28, 31, v27
8068 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:192
8069 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:160
8070 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:128
8071 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:96
8072 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64
8073 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[5:8], off, s[0:3], 0 offset:32
8074 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0
8075 ; GCN-NOHSA-SI-NEXT: s_endpgm
8077 ; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i64:
8079 ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
8080 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
8081 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
8082 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
8083 ; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
8084 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 48
8085 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
8086 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
8087 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5
8088 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
8089 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 32
8090 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
8091 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4
8092 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5
8093 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
8094 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 16
8095 ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0
8096 ; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3
8097 ; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2
8098 ; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
8099 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
8100 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
8101 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3
8102 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2
8103 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
8104 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
8105 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3
8106 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2
8107 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0
8108 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
8109 ; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
8110 ; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
8111 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0
8112 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
8113 ; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3
8114 ; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2
8115 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0
8116 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
8117 ; GCN-HSA-NEXT: s_waitcnt vmcnt(3)
8118 ; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[8:9], 48
8119 ; GCN-HSA-NEXT: v_bfe_i32 v16, v9, 0, 16
8120 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
8121 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[16:19]
8122 ; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3
8123 ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2
8124 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90
8125 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
8126 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x70
8127 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0
8128 ; GCN-HSA-NEXT: s_add_u32 s6, s0, 0x50
8129 ; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0
8130 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, v11
8131 ; GCN-HSA-NEXT: s_add_u32 s8, s0, 32
8132 ; GCN-HSA-NEXT: v_bfe_i32 v16, v9, 0, 16
8133 ; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[10:11], 48
8134 ; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0
8135 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
8136 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v10
8137 ; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19]
8138 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s9
8139 ; GCN-HSA-NEXT: v_bfe_i32 v18, v9, 0, 16
8140 ; GCN-HSA-NEXT: v_bfe_i32 v16, v10, 0, 16
8141 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, s8
8142 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
8143 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v18
8144 ; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19]
8145 ; GCN-HSA-NEXT: v_mov_b32_e32 v10, s3
8146 ; GCN-HSA-NEXT: s_waitcnt vmcnt(5)
8147 ; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[0:1], 48
8148 ; GCN-HSA-NEXT: v_bfe_i32 v16, v1, 0, 16
8149 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
8150 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, v3
8151 ; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[16:19]
8152 ; GCN-HSA-NEXT: v_mov_b32_e32 v9, s2
8153 ; GCN-HSA-NEXT: v_bfe_i32 v16, v1, 0, 16
8154 ; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[2:3], 48
8155 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
8156 ; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[16:19]
8157 ; GCN-HSA-NEXT: s_waitcnt vmcnt(6)
8158 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, v7
8159 ; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[4:5], 48
8160 ; GCN-HSA-NEXT: v_bfe_i32 v16, v5, 0, 16
8161 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
8162 ; GCN-HSA-NEXT: flat_store_dwordx4 v[9:10], v[16:19]
8163 ; GCN-HSA-NEXT: v_mov_b32_e32 v25, s7
8164 ; GCN-HSA-NEXT: v_bfe_i32 v16, v3, 0, 16
8165 ; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[6:7], 48
8166 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
8167 ; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[16:19]
8168 ; GCN-HSA-NEXT: v_mov_b32_e32 v24, s6
8169 ; GCN-HSA-NEXT: s_waitcnt vmcnt(7)
8170 ; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[12:13], 48
8171 ; GCN-HSA-NEXT: v_bfe_i32 v16, v13, 0, 16
8172 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
8173 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, v15
8174 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s5
8175 ; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[16:19]
8176 ; GCN-HSA-NEXT: v_mov_b32_e32 v20, s4
8177 ; GCN-HSA-NEXT: v_bfe_i32 v16, v3, 0, 16
8178 ; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[14:15], 48
8179 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v8
8180 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
8181 ; GCN-HSA-NEXT: v_bfe_i32 v8, v8, 0, 16
8182 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v24, 16, v14
8183 ; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19]
8184 ; GCN-HSA-NEXT: v_bfe_i32 v10, v1, 0, 16
8185 ; GCN-HSA-NEXT: v_bfe_i32 v16, v14, 0, 16
8186 ; GCN-HSA-NEXT: v_mov_b32_e32 v15, s1
8187 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v26, 16, v2
8188 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v12
8189 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
8190 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10
8191 ; GCN-HSA-NEXT: v_mov_b32_e32 v14, s0
8192 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0
8193 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v22, 16, v0
8194 ; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11]
8195 ; GCN-HSA-NEXT: v_bfe_i32 v14, v18, 0, 16
8196 ; GCN-HSA-NEXT: v_bfe_i32 v18, v24, 0, 16
8197 ; GCN-HSA-NEXT: v_bfe_i32 v20, v0, 0, 16
8198 ; GCN-HSA-NEXT: v_bfe_i32 v24, v2, 0, 16
8199 ; GCN-HSA-NEXT: v_bfe_i32 v26, v26, 0, 16
8200 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
8201 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
8202 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v25, 31, v24
8203 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v27, 31, v26
8204 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
8205 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0
8206 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v23, 16, v6
8207 ; GCN-HSA-NEXT: v_bfe_i32 v22, v22, 0, 16
8208 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27]
8209 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
8210 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
8211 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 31, v20
8212 ; GCN-HSA-NEXT: v_bfe_i32 v9, v23, 0, 16
8213 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 31, v22
8214 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
8215 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0
8216 ; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16
8217 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[20:23]
8218 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
8219 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
8220 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v4
8221 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v7
8222 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v9
8223 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
8224 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80
8225 ; GCN-HSA-NEXT: v_bfe_i32 v3, v4, 0, 16
8226 ; GCN-HSA-NEXT: v_bfe_i32 v5, v5, 0, 16
8227 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
8228 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
8229 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
8230 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v4, 31, v3
8231 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v6, 31, v5
8232 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
8233 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
8234 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
8235 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
8236 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
8237 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
8238 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v18
8239 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
8240 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 64
8241 ; GCN-HSA-NEXT: v_bfe_i32 v12, v12, 0, 16
8242 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19]
8243 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
8244 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
8245 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12
8246 ; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v14
8247 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
8248 ; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
8249 ; GCN-HSA-NEXT: s_endpgm
8251 ; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i64:
8252 ; GCN-NOHSA-VI: ; %bb.0:
8253 ; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
8254 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000
8255 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1
8256 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2
8257 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3
8258 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
8259 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
8260 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
8261 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[5:8], off, s[8:11], 0
8262 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[13:16], off, s[8:11], 0 offset:48
8263 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[9:12], off, s[8:11], 0 offset:32
8264 ; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[1:4], off, s[8:11], 0 offset:16
8265 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
8266 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
8267 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3)
8268 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v6, 0, 16
8269 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2)
8270 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v15, 0, 16
8271 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v15
8272 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v15, 0, 16
8273 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v6
8274 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, v16
8275 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v16, 16, v16
8276 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
8277 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 31, v20
8278 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:224
8279 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v14
8280 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v6, 0, 16
8281 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v16, 0, 16
8282 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
8283 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 31, v20
8284 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:240
8285 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v13
8286 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v13, 0, 16
8287 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v14, 0, 16
8288 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v15, 0, 16
8289 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
8290 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
8291 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v6, 0, 16
8292 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:208
8293 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
8294 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4)
8295 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v11
8296 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 31, v20
8297 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v13, 0, 16
8298 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v11, 0, 16
8299 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:192
8300 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
8301 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, v12
8302 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
8303 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v12, 16, v12
8304 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:160
8305 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v19, 0, 16
8306 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v12, 0, 16
8307 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v18, v8
8308 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v16, 16, v8
8309 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
8310 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
8311 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v8, 16, v9
8312 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:176
8313 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v5, 0, 16
8314 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v8, 0, 16
8315 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v9, 0, 16
8316 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
8317 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
8318 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v10
8319 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:128
8320 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v9, 0, 16
8321 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v7, 0, 16
8322 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v12, 16, v7
8323 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v7, v10, 0, 16
8324 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v8, 31, v7
8325 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9
8326 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:144
8327 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
8328 ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8)
8329 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v7, v3, 0, 16
8330 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
8331 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v3, 0, 16
8332 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v8, 31, v7
8333 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9
8334 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:96
8335 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v2
8336 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v5, 0, 16
8337 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v4
8338 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
8339 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1
8340 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v27, v5, 0, 16
8341 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v29, v4, 0, 16
8342 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v2, 0, 16
8343 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v17, 0, 16
8344 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v18, 0, 16
8345 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v16, 0, 16
8346 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v12, 0, 16
8347 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v23, v1, 0, 16
8348 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v3, 0, 16
8349 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v25, v7, 0, 16
8350 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v28, 31, v27
8351 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v30, 31, v29
8352 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
8353 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
8354 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
8355 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
8356 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19
8357 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
8358 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
8359 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
8360 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
8361 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
8362 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
8363 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v26, 31, v25
8364 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:112
8365 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:64
8366 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80
8367 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:32
8368 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:48
8369 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0
8370 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
8371 ; GCN-NOHSA-VI-NEXT: s_endpgm
8373 ; EG-LABEL: global_sextload_v32i16_to_v32i64:
8375 ; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[]
8376 ; EG-NEXT: TEX 0 @22
8377 ; EG-NEXT: ALU 56, @31, KC0[CB0:0-32], KC1[]
8378 ; EG-NEXT: TEX 2 @24
8379 ; EG-NEXT: ALU 74, @88, KC0[CB0:0-32], KC1[]
8380 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0
8381 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0
8382 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T34.X, 0
8383 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T33.X, 0
8384 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0
8385 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0
8386 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T30.X, 0
8387 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T29.X, 0
8388 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0
8389 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0
8390 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T26.X, 0
8391 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T25.X, 0
8392 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0
8393 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0
8394 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0
8395 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1
8397 ; EG-NEXT: Fetch clause starting at 22:
8398 ; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1
8399 ; EG-NEXT: Fetch clause starting at 24:
8400 ; EG-NEXT: VTX_READ_128 T38.XYZW, T19.X, 48, #1
8401 ; EG-NEXT: VTX_READ_128 T39.XYZW, T19.X, 32, #1
8402 ; EG-NEXT: VTX_READ_128 T40.XYZW, T19.X, 16, #1
8403 ; EG-NEXT: ALU clause starting at 30:
8404 ; EG-NEXT: MOV * T19.X, KC0[2].Z,
8405 ; EG-NEXT: ALU clause starting at 31:
8406 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
8407 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
8408 ; EG-NEXT: LSHR T21.X, PV.W, literal.x,
8409 ; EG-NEXT: LSHR * T22.X, KC0[2].Y, literal.x,
8410 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
8411 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
8412 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
8413 ; EG-NEXT: LSHR T23.X, PV.W, literal.x,
8414 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8415 ; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
8416 ; EG-NEXT: LSHR T24.X, PV.W, literal.x,
8417 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8418 ; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
8419 ; EG-NEXT: LSHR T25.X, PV.W, literal.x,
8420 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8421 ; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
8422 ; EG-NEXT: LSHR T26.X, PV.W, literal.x,
8423 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8424 ; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
8425 ; EG-NEXT: LSHR T27.X, PV.W, literal.x,
8426 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8427 ; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
8428 ; EG-NEXT: LSHR T28.X, PV.W, literal.x,
8429 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8430 ; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43)
8431 ; EG-NEXT: LSHR T29.X, PV.W, literal.x,
8432 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8433 ; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43)
8434 ; EG-NEXT: LSHR T30.X, PV.W, literal.x,
8435 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8436 ; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43)
8437 ; EG-NEXT: LSHR T31.X, PV.W, literal.x,
8438 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8439 ; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43)
8440 ; EG-NEXT: LSHR T32.X, PV.W, literal.x,
8441 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8442 ; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43)
8443 ; EG-NEXT: LSHR T33.X, PV.W, literal.x,
8444 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8445 ; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43)
8446 ; EG-NEXT: LSHR T34.X, PV.W, literal.x,
8447 ; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y,
8448 ; EG-NEXT: ASHR * T35.W, T20.Y, literal.z,
8449 ; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43)
8450 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
8451 ; EG-NEXT: LSHR T36.X, PV.W, literal.x,
8452 ; EG-NEXT: ASHR T35.Z, T20.Y, literal.y,
8453 ; EG-NEXT: ASHR * T37.W, T20.X, literal.z,
8454 ; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
8455 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
8456 ; EG-NEXT: BFE_INT T35.X, T20.Y, 0.0, literal.x,
8457 ; EG-NEXT: ASHR * T37.Z, T20.X, literal.x,
8458 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
8459 ; EG-NEXT: BFE_INT T37.X, T20.X, 0.0, literal.x,
8460 ; EG-NEXT: ASHR T35.Y, PV.X, literal.y,
8461 ; EG-NEXT: ASHR * T19.W, T20.W, literal.y,
8462 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8463 ; EG-NEXT: ALU clause starting at 88:
8464 ; EG-NEXT: ASHR T19.Z, T20.W, literal.x,
8465 ; EG-NEXT: ASHR * T41.W, T20.Z, literal.y,
8466 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8467 ; EG-NEXT: BFE_INT T19.X, T20.W, 0.0, literal.x,
8468 ; EG-NEXT: ASHR T37.Y, T37.X, literal.y,
8469 ; EG-NEXT: ASHR T41.Z, T20.Z, literal.x,
8470 ; EG-NEXT: ASHR * T20.W, T40.Y, literal.y,
8471 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8472 ; EG-NEXT: BFE_INT T41.X, T20.Z, 0.0, literal.x,
8473 ; EG-NEXT: ASHR T19.Y, PV.X, literal.y,
8474 ; EG-NEXT: ASHR T20.Z, T40.Y, literal.x,
8475 ; EG-NEXT: ASHR * T42.W, T40.X, literal.y,
8476 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8477 ; EG-NEXT: BFE_INT T20.X, T40.Y, 0.0, literal.x,
8478 ; EG-NEXT: ASHR T41.Y, PV.X, literal.y,
8479 ; EG-NEXT: ASHR T42.Z, T40.X, literal.x,
8480 ; EG-NEXT: ASHR * T43.W, T40.W, literal.y,
8481 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8482 ; EG-NEXT: BFE_INT T42.X, T40.X, 0.0, literal.x,
8483 ; EG-NEXT: ASHR T20.Y, PV.X, literal.y,
8484 ; EG-NEXT: ASHR T43.Z, T40.W, literal.x,
8485 ; EG-NEXT: ASHR * T44.W, T40.Z, literal.y,
8486 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8487 ; EG-NEXT: BFE_INT T43.X, T40.W, 0.0, literal.x,
8488 ; EG-NEXT: ASHR T42.Y, PV.X, literal.y,
8489 ; EG-NEXT: ASHR T44.Z, T40.Z, literal.x,
8490 ; EG-NEXT: ASHR * T40.W, T39.Y, literal.y,
8491 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8492 ; EG-NEXT: BFE_INT T44.X, T40.Z, 0.0, literal.x,
8493 ; EG-NEXT: ASHR T43.Y, PV.X, literal.y,
8494 ; EG-NEXT: ASHR T40.Z, T39.Y, literal.x,
8495 ; EG-NEXT: ASHR * T45.W, T39.X, literal.y,
8496 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8497 ; EG-NEXT: BFE_INT T40.X, T39.Y, 0.0, literal.x,
8498 ; EG-NEXT: ASHR T44.Y, PV.X, literal.y,
8499 ; EG-NEXT: ASHR T45.Z, T39.X, literal.x,
8500 ; EG-NEXT: ASHR * T46.W, T39.W, literal.y,
8501 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8502 ; EG-NEXT: BFE_INT T45.X, T39.X, 0.0, literal.x,
8503 ; EG-NEXT: ASHR T40.Y, PV.X, literal.y,
8504 ; EG-NEXT: ASHR T46.Z, T39.W, literal.x,
8505 ; EG-NEXT: ASHR * T47.W, T39.Z, literal.y,
8506 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8507 ; EG-NEXT: BFE_INT T46.X, T39.W, 0.0, literal.x,
8508 ; EG-NEXT: ASHR T45.Y, PV.X, literal.y,
8509 ; EG-NEXT: ASHR T47.Z, T39.Z, literal.x,
8510 ; EG-NEXT: ASHR * T39.W, T38.Y, literal.y,
8511 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8512 ; EG-NEXT: BFE_INT T47.X, T39.Z, 0.0, literal.x,
8513 ; EG-NEXT: ASHR T46.Y, PV.X, literal.y,
8514 ; EG-NEXT: ASHR T39.Z, T38.Y, literal.x,
8515 ; EG-NEXT: ASHR * T48.W, T38.X, literal.y,
8516 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8517 ; EG-NEXT: BFE_INT T39.X, T38.Y, 0.0, literal.x,
8518 ; EG-NEXT: ASHR T47.Y, PV.X, literal.y,
8519 ; EG-NEXT: ASHR T48.Z, T38.X, literal.x,
8520 ; EG-NEXT: ASHR * T49.W, T38.W, literal.y,
8521 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8522 ; EG-NEXT: BFE_INT T48.X, T38.X, 0.0, literal.x,
8523 ; EG-NEXT: ASHR T39.Y, PV.X, literal.y,
8524 ; EG-NEXT: ASHR T49.Z, T38.W, literal.x,
8525 ; EG-NEXT: ASHR * T50.W, T38.Z, literal.y,
8526 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8527 ; EG-NEXT: BFE_INT T49.X, T38.W, 0.0, literal.x,
8528 ; EG-NEXT: ASHR T48.Y, PV.X, literal.y,
8529 ; EG-NEXT: ASHR * T50.Z, T38.Z, literal.x,
8530 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8531 ; EG-NEXT: BFE_INT T50.X, T38.Z, 0.0, literal.x,
8532 ; EG-NEXT: ASHR T49.Y, PV.X, literal.y,
8533 ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
8534 ; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8535 ; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00)
8536 ; EG-NEXT: LSHR T38.X, PV.W, literal.x,
8537 ; EG-NEXT: ASHR * T50.Y, PV.X, literal.y,
8538 ; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44)
8540 ; CM-LABEL: global_sextload_v32i16_to_v32i64:
8542 ; CM-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[]
8543 ; CM-NEXT: TEX 0 @22
8544 ; CM-NEXT: ALU 55, @31, KC0[CB0:0-32], KC1[]
8545 ; CM-NEXT: TEX 2 @24
8546 ; CM-NEXT: ALU 73, @87, KC0[CB0:0-32], KC1[]
8547 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T38, T50.X
8548 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T49, T36.X
8549 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T48, T34.X
8550 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T47, T33.X
8551 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T39, T32.X
8552 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T46, T31.X
8553 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T30.X
8554 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T29.X
8555 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T40, T28.X
8556 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T43, T27.X
8557 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T26.X
8558 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T25.X
8559 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T24.X
8560 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T23.X
8561 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T37, T22.X
8562 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T21.X
8564 ; CM-NEXT: Fetch clause starting at 22:
8565 ; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1
8566 ; CM-NEXT: Fetch clause starting at 24:
8567 ; CM-NEXT: VTX_READ_128 T38.XYZW, T19.X, 0, #1
8568 ; CM-NEXT: VTX_READ_128 T39.XYZW, T19.X, 16, #1
8569 ; CM-NEXT: VTX_READ_128 T40.XYZW, T19.X, 32, #1
8570 ; CM-NEXT: ALU clause starting at 30:
8571 ; CM-NEXT: MOV * T19.X, KC0[2].Z,
8572 ; CM-NEXT: ALU clause starting at 31:
8573 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
8574 ; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00)
8575 ; CM-NEXT: LSHR T21.X, PV.W, literal.x,
8576 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8577 ; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43)
8578 ; CM-NEXT: LSHR T22.X, PV.W, literal.x,
8579 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8580 ; CM-NEXT: 2(2.802597e-45), 192(2.690493e-43)
8581 ; CM-NEXT: LSHR T23.X, PV.W, literal.x,
8582 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8583 ; CM-NEXT: 2(2.802597e-45), 208(2.914701e-43)
8584 ; CM-NEXT: LSHR T24.X, PV.W, literal.x,
8585 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8586 ; CM-NEXT: 2(2.802597e-45), 160(2.242078e-43)
8587 ; CM-NEXT: LSHR T25.X, PV.W, literal.x,
8588 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8589 ; CM-NEXT: 2(2.802597e-45), 176(2.466285e-43)
8590 ; CM-NEXT: LSHR T26.X, PV.W, literal.x,
8591 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8592 ; CM-NEXT: 2(2.802597e-45), 128(1.793662e-43)
8593 ; CM-NEXT: LSHR T27.X, PV.W, literal.x,
8594 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8595 ; CM-NEXT: 2(2.802597e-45), 144(2.017870e-43)
8596 ; CM-NEXT: LSHR T28.X, PV.W, literal.x,
8597 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8598 ; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43)
8599 ; CM-NEXT: LSHR T29.X, PV.W, literal.x,
8600 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8601 ; CM-NEXT: 2(2.802597e-45), 112(1.569454e-43)
8602 ; CM-NEXT: LSHR T30.X, PV.W, literal.x,
8603 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8604 ; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44)
8605 ; CM-NEXT: LSHR T31.X, PV.W, literal.x,
8606 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8607 ; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43)
8608 ; CM-NEXT: LSHR T32.X, PV.W, literal.x,
8609 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8610 ; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44)
8611 ; CM-NEXT: LSHR T33.X, PV.W, literal.x,
8612 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
8613 ; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44)
8614 ; CM-NEXT: LSHR T34.X, PV.W, literal.x,
8615 ; CM-NEXT: ASHR * T35.W, T20.Z, literal.y,
8616 ; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44)
8617 ; CM-NEXT: LSHR T36.X, KC0[2].Y, literal.x,
8618 ; CM-NEXT: ASHR T35.Z, T20.Z, literal.y,
8619 ; CM-NEXT: ASHR * T37.W, T20.W, literal.z,
8620 ; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44)
8621 ; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00)
8622 ; CM-NEXT: BFE_INT T35.X, T20.Z, 0.0, literal.x,
8623 ; CM-NEXT: ASHR * T37.Z, T20.W, literal.x,
8624 ; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
8625 ; CM-NEXT: BFE_INT T37.X, T20.W, 0.0, literal.x,
8626 ; CM-NEXT: ASHR T35.Y, PV.X, literal.y,
8627 ; CM-NEXT: ASHR * T19.W, T20.X, literal.y,
8628 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8629 ; CM-NEXT: ALU clause starting at 87:
8630 ; CM-NEXT: ASHR T19.Z, T20.X, literal.x,
8631 ; CM-NEXT: ASHR * T20.W, T20.Y, literal.y,
8632 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8633 ; CM-NEXT: BFE_INT T19.X, T20.X, 0.0, literal.x,
8634 ; CM-NEXT: ASHR T37.Y, T37.X, literal.y, BS:VEC_120/SCL_212
8635 ; CM-NEXT: ASHR T20.Z, T20.Y, literal.x,
8636 ; CM-NEXT: ASHR * T41.W, T40.Z, literal.y,
8637 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8638 ; CM-NEXT: BFE_INT T20.X, T20.Y, 0.0, literal.x,
8639 ; CM-NEXT: ASHR T19.Y, PV.X, literal.y,
8640 ; CM-NEXT: ASHR T41.Z, T40.Z, literal.x,
8641 ; CM-NEXT: ASHR * T42.W, T40.W, literal.y,
8642 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8643 ; CM-NEXT: BFE_INT T41.X, T40.Z, 0.0, literal.x,
8644 ; CM-NEXT: ASHR T20.Y, PV.X, literal.y,
8645 ; CM-NEXT: ASHR T42.Z, T40.W, literal.x,
8646 ; CM-NEXT: ASHR * T43.W, T40.X, literal.y,
8647 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8648 ; CM-NEXT: BFE_INT T42.X, T40.W, 0.0, literal.x,
8649 ; CM-NEXT: ASHR T41.Y, PV.X, literal.y,
8650 ; CM-NEXT: ASHR T43.Z, T40.X, literal.x,
8651 ; CM-NEXT: ASHR * T40.W, T40.Y, literal.y,
8652 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8653 ; CM-NEXT: BFE_INT T43.X, T40.X, 0.0, literal.x,
8654 ; CM-NEXT: ASHR T42.Y, PV.X, literal.y,
8655 ; CM-NEXT: ASHR T40.Z, T40.Y, literal.x,
8656 ; CM-NEXT: ASHR * T44.W, T39.Z, literal.y,
8657 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8658 ; CM-NEXT: BFE_INT T40.X, T40.Y, 0.0, literal.x,
8659 ; CM-NEXT: ASHR T43.Y, PV.X, literal.y,
8660 ; CM-NEXT: ASHR T44.Z, T39.Z, literal.x,
8661 ; CM-NEXT: ASHR * T45.W, T39.W, literal.y,
8662 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8663 ; CM-NEXT: BFE_INT T44.X, T39.Z, 0.0, literal.x,
8664 ; CM-NEXT: ASHR T40.Y, PV.X, literal.y,
8665 ; CM-NEXT: ASHR T45.Z, T39.W, literal.x,
8666 ; CM-NEXT: ASHR * T46.W, T39.X, literal.y,
8667 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8668 ; CM-NEXT: BFE_INT T45.X, T39.W, 0.0, literal.x,
8669 ; CM-NEXT: ASHR T44.Y, PV.X, literal.y,
8670 ; CM-NEXT: ASHR T46.Z, T39.X, literal.x,
8671 ; CM-NEXT: ASHR * T39.W, T39.Y, literal.y,
8672 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8673 ; CM-NEXT: BFE_INT T46.X, T39.X, 0.0, literal.x,
8674 ; CM-NEXT: ASHR T45.Y, PV.X, literal.y,
8675 ; CM-NEXT: ASHR T39.Z, T39.Y, literal.x,
8676 ; CM-NEXT: ASHR * T47.W, T38.Z, literal.y,
8677 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8678 ; CM-NEXT: BFE_INT T39.X, T39.Y, 0.0, literal.x,
8679 ; CM-NEXT: ASHR T46.Y, PV.X, literal.y,
8680 ; CM-NEXT: ASHR T47.Z, T38.Z, literal.x,
8681 ; CM-NEXT: ASHR * T48.W, T38.W, literal.y,
8682 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8683 ; CM-NEXT: BFE_INT T47.X, T38.Z, 0.0, literal.x,
8684 ; CM-NEXT: ASHR T39.Y, PV.X, literal.y,
8685 ; CM-NEXT: ASHR T48.Z, T38.W, literal.x,
8686 ; CM-NEXT: ASHR * T49.W, T38.X, literal.y,
8687 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8688 ; CM-NEXT: BFE_INT T48.X, T38.W, 0.0, literal.x,
8689 ; CM-NEXT: ASHR T47.Y, PV.X, literal.y,
8690 ; CM-NEXT: ASHR T49.Z, T38.X, literal.x,
8691 ; CM-NEXT: ASHR * T38.W, T38.Y, literal.y,
8692 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8693 ; CM-NEXT: BFE_INT T49.X, T38.X, 0.0, literal.x,
8694 ; CM-NEXT: ASHR T48.Y, PV.X, literal.y,
8695 ; CM-NEXT: ASHR * T38.Z, T38.Y, literal.x,
8696 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8697 ; CM-NEXT: BFE_INT T38.X, T38.Y, 0.0, literal.x,
8698 ; CM-NEXT: ASHR T49.Y, PV.X, literal.y,
8699 ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
8700 ; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44)
8701 ; CM-NEXT: LSHR T50.X, PV.W, literal.x,
8702 ; CM-NEXT: ASHR * T38.Y, PV.X, literal.y,
8703 ; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44)
8704 %load = load <32 x i16>, ptr addrspace(1) %in
8705 %ext = sext <32 x i16> %load to <32 x i64>
8706 store <32 x i64> %ext, ptr addrspace(1) %out
8710 ; define amdgpu_kernel void @global_zextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
8711 ; %load = load <64 x i16>, ptr addrspace(1) %in
8712 ; %ext = zext <64 x i16> %load to <64 x i64>
8713 ; store <64 x i64> %ext, ptr addrspace(1) %out
8717 ; define amdgpu_kernel void @global_sextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
8718 ; %load = load <64 x i16>, ptr addrspace(1) %in
8719 ; %ext = sext <64 x i16> %load to <64 x i64>
8720 ; store <64 x i64> %ext, ptr addrspace(1) %out
8724 attributes #0 = { nounwind }