1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,SICIVI,FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,GFX89,FUNC %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX89,FUNC %s
4 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
6 ; FUNC-LABEL: {{^}}local_load_i16:
10 ; GCN: ds_read_u16 v{{[0-9]+}}
12 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
13 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
14 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
15 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
16 ; EG: LDS_SHORT_WRITE {{\*?}} [[TO]], [[DATA]]
17 define amdgpu_kernel void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) {
19 %ld = load i16, i16 addrspace(3)* %in
20 store i16 %ld, i16 addrspace(3)* %out
24 ; FUNC-LABEL: {{^}}local_load_v2i16:
26 ; SICIVI: s_mov_b32 m0
30 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
31 ; EG: LDS_READ_RET {{.*}} [[FROM]]
32 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
33 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
34 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
35 define amdgpu_kernel void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) {
37 %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in
38 store <2 x i16> %ld, <2 x i16> addrspace(3)* %out
42 ; FUNC-LABEL: {{^}}local_load_v3i16:
44 ; SICIVI: s_mov_b32 m0
47 ; GCN-DAG: ds_write_b32
48 ; GCN-DAG: ds_write_b16
50 ; EG-DAG: LDS_USHORT_READ_RET
51 ; EG-DAG: LDS_READ_RET
52 define amdgpu_kernel void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
54 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
55 store <3 x i16> %ld, <3 x i16> addrspace(3)* %out
59 ; FUNC-LABEL: {{^}}local_load_v4i16:
61 ; SICIVI: s_mov_b32 m0
67 define amdgpu_kernel void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) {
69 %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in
70 store <4 x i16> %ld, <4 x i16> addrspace(3)* %out
74 ; FUNC-LABEL: {{^}}local_load_v8i16:
76 ; SICIVI: s_mov_b32 m0
78 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
84 define amdgpu_kernel void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) {
86 %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in
87 store <8 x i16> %ld, <8 x i16> addrspace(3)* %out
91 ; FUNC-LABEL: {{^}}local_load_v16i16:
93 ; SICIVI: s_mov_b32 m0
95 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}}
96 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}}
108 define amdgpu_kernel void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) {
110 %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in
111 store <16 x i16> %ld, <16 x i16> addrspace(3)* %out
115 ; FUNC-LABEL: {{^}}local_zextload_i16_to_i32:
117 ; SICIVI: s_mov_b32 m0
122 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
123 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
124 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
125 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
126 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
127 define amdgpu_kernel void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
128 %a = load i16, i16 addrspace(3)* %in
129 %ext = zext i16 %a to i32
130 store i32 %ext, i32 addrspace(3)* %out
134 ; FUNC-LABEL: {{^}}local_sextload_i16_to_i32:
138 ; SICIVI: s_mov_b32 m0
142 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
143 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
144 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
145 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
146 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
148 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
149 define amdgpu_kernel void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
150 %a = load i16, i16 addrspace(3)* %in
151 %ext = sext i16 %a to i32
152 store i32 %ext, i32 addrspace(3)* %out
156 ; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32:
158 ; SICIVI: s_mov_b32 m0
162 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
163 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
164 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
165 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
166 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
167 define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
168 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
169 %ext = zext <1 x i16> %load to <1 x i32>
170 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
174 ; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32:
176 ; SICIVI: s_mov_b32 m0
180 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
181 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
182 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
183 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
184 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
186 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
187 define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
188 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
189 %ext = sext <1 x i16> %load to <1 x i32>
190 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
194 ; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32:
197 ; SICIVI: s_mov_b32 m0
202 define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
203 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
204 %ext = zext <2 x i16> %load to <2 x i32>
205 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
209 ; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32:
212 ; SICIVI: s_mov_b32 m0
219 define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
220 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
221 %ext = sext <2 x i16> %load to <2 x i32>
222 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
226 ; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32:
228 ; SICIVI: s_mov_b32 m0
231 ; GCN-DAG: ds_write_b32
232 ; GCN-DAG: ds_write_b64
235 define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
237 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
238 %ext = zext <3 x i16> %ld to <3 x i32>
239 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
243 ; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32:
245 ; SICIVI: s_mov_b32 m0
248 ; GCN-DAG: ds_write_b32
249 ; GCN-DAG: ds_write_b64
255 define amdgpu_kernel void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
257 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
258 %ext = sext <3 x i16> %ld to <3 x i32>
259 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
263 ; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32:
266 ; SICIVI: s_mov_b32 m0
272 define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
273 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
274 %ext = zext <4 x i16> %load to <4 x i32>
275 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
279 ; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32:
282 ; SICIVI: s_mov_b32 m0
292 define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
293 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
294 %ext = sext <4 x i16> %load to <4 x i32>
295 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
299 ; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32:
301 ; SICIVI: s_mov_b32 m0
303 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
309 define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
310 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
311 %ext = zext <8 x i16> %load to <8 x i32>
312 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
316 ; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32:
318 ; SICIVI: s_mov_b32 m0
320 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
334 define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
335 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
336 %ext = sext <8 x i16> %load to <8 x i32>
337 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
341 ; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32:
343 ; SICIVI: s_mov_b32 m0
345 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
346 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
361 define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
362 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
363 %ext = zext <16 x i16> %load to <16 x i32>
364 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
368 ; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32:
370 ; SICIVI: s_mov_b32 m0
373 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
374 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
400 define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
401 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
402 %ext = sext <16 x i16> %load to <16 x i32>
403 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
407 ; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32:
409 ; SICIVI: s_mov_b32 m0
411 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
412 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
413 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
414 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
432 define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
433 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
434 %ext = zext <32 x i16> %load to <32 x i32>
435 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
439 ; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32:
441 ; SICIVI: s_mov_b32 m0
443 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
444 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
445 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
446 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
447 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
448 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
449 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
450 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
451 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
452 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
453 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
454 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
472 define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
473 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
474 %ext = sext <32 x i16> %load to <32 x i32>
475 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
479 ; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32:
481 ; SICIVI: s_mov_b32 m0
483 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15
484 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
485 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
486 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
487 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
488 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9
489 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13
490 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11
491 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
492 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
493 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:26 offset1:27
494 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:24 offset1:25
495 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:22 offset1:23
496 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:20 offset1:21
497 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:18 offset1:19
498 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:16 offset1:17
499 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
500 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
501 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
502 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
503 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
504 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
505 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
506 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
540 define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
541 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
542 %ext = zext <64 x i16> %load to <64 x i32>
543 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
547 ; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32:
549 ; SICIVI: s_mov_b32 m0
583 define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
584 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
585 %ext = sext <64 x i16> %load to <64 x i32>
586 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
590 ; FUNC-LABEL: {{^}}local_zextload_i16_to_i64:
592 ; SICIVI: s_mov_b32 m0
594 ; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]],
595 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
597 ; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
599 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
600 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
601 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
602 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
604 define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
605 %a = load i16, i16 addrspace(3)* %in
606 %ext = zext i16 %a to i64
607 store i64 %ext, i64 addrspace(3)* %out
611 ; FUNC-LABEL: {{^}}local_sextload_i16_to_i64:
613 ; SICIVI: s_mov_b32 m0
615 ; FIXME: Need to optimize this sequence to avoid an extra shift.
616 ; t25: i32,ch = load<LD2[%in(addrspace=3)], anyext from i16> t12, t10, undef:i32
617 ; t28: i64 = any_extend t25
618 ; t30: i64 = sign_extend_inreg t28, ValueType:ch:i16
619 ; SI: ds_read_i16 v[[LO:[0-9]+]],
620 ; GFX89: ds_read_u16 v[[ULO:[0-9]+]]
621 ; GFX89: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16
622 ; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
624 ; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
626 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
627 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
628 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
629 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
630 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
633 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
634 define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
635 %a = load i16, i16 addrspace(3)* %in
636 %ext = sext i16 %a to i64
637 store i64 %ext, i64 addrspace(3)* %out
641 ; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64:
643 ; SICIVI: s_mov_b32 m0
646 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
647 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
648 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
649 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
651 define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
652 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
653 %ext = zext <1 x i16> %load to <1 x i64>
654 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
658 ; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64:
660 ; SICIVI: s_mov_b32 m0
663 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
664 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
665 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
666 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
667 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
670 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
671 define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
672 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
673 %ext = sext <1 x i16> %load to <1 x i64>
674 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
678 ; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64:
680 ; SICIVI: s_mov_b32 m0
684 define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
685 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
686 %ext = zext <2 x i16> %load to <2 x i64>
687 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
691 ; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64:
693 ; SICIVI: s_mov_b32 m0
699 define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
700 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
701 %ext = sext <2 x i16> %load to <2 x i64>
702 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
706 ; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64:
708 ; SICIVI: s_mov_b32 m0
713 define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
714 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
715 %ext = zext <4 x i16> %load to <4 x i64>
716 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
720 ; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64:
722 ; SICIVI: s_mov_b32 m0
731 define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
732 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
733 %ext = sext <4 x i16> %load to <4 x i64>
734 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
738 ; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64:
740 ; SICIVI: s_mov_b32 m0
747 define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
748 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
749 %ext = zext <8 x i16> %load to <8 x i64>
750 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
754 ; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64:
756 ; SICIVI: s_mov_b32 m0
771 define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
772 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
773 %ext = sext <8 x i16> %load to <8 x i64>
774 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
778 ; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64:
780 ; SICIVI: s_mov_b32 m0
791 define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
792 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
793 %ext = zext <16 x i16> %load to <16 x i64>
794 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
798 ; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64:
800 ; SICIVI: s_mov_b32 m0
827 define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
828 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
829 %ext = sext <16 x i16> %load to <16 x i64>
830 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
834 ; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64:
836 ; SICIVI: s_mov_b32 m0
855 define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
856 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
857 %ext = zext <32 x i16> %load to <32 x i64>
858 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
862 ; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64:
864 ; SICIVI: s_mov_b32 m0
915 define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
916 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
917 %ext = sext <32 x i16> %load to <32 x i64>
918 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
922 ; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64:
923 ; define amdgpu_kernel void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
924 ; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
925 ; %ext = zext <64 x i16> %load to <64 x i64>
926 ; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
930 ; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64:
931 ; define amdgpu_kernel void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
932 ; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
933 ; %ext = sext <64 x i16> %load to <64 x i64>
934 ; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
938 attributes #0 = { nounwind }