1 ; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s
2 ; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s
3 ; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA %s
4 ; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s
6 ; Disable optimizations in case there are optimizations added that
7 ; specialize away generic pointer accesses.
10 ; These testcases might become useless when there are optimizations to
11 ; remove generic pointers.
13 ; CHECK-LABEL: {{^}}store_flat_i32:
14 ; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
15 ; CHECK-DAG: s_load_dword s[[SDATA:[0-9]+]],
16 ; CHECK: s_waitcnt lgkmcnt(0)
17 ; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]]
18 ; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
19 ; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
20 ; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]]
21 define amdgpu_kernel void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
22 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32*
23 store volatile i32 %x, i32* %fptr, align 4
27 ; CHECK-LABEL: {{^}}store_flat_i64:
28 ; CHECK: flat_store_dwordx2
29 define amdgpu_kernel void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
30 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64*
31 store volatile i64 %x, i64* %fptr, align 8
35 ; CHECK-LABEL: {{^}}store_flat_v4i32:
36 ; CHECK: flat_store_dwordx4
37 define amdgpu_kernel void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
38 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32>*
39 store volatile <4 x i32> %x, <4 x i32>* %fptr, align 16
43 ; CHECK-LABEL: {{^}}store_flat_trunc_i16:
44 ; CHECK: flat_store_short
45 define amdgpu_kernel void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
46 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
47 %y = trunc i32 %x to i16
48 store volatile i16 %y, i16* %fptr, align 2
52 ; CHECK-LABEL: {{^}}store_flat_trunc_i8:
53 ; CHECK: flat_store_byte
54 define amdgpu_kernel void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
55 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
56 %y = trunc i32 %x to i8
57 store volatile i8 %y, i8* %fptr, align 2
63 ; CHECK-LABEL: load_flat_i32:
64 ; CHECK: flat_load_dword
65 define amdgpu_kernel void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
66 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32*
67 %fload = load volatile i32, i32* %fptr, align 4
68 store i32 %fload, i32 addrspace(1)* %out, align 4
72 ; CHECK-LABEL: load_flat_i64:
73 ; CHECK: flat_load_dwordx2
74 define amdgpu_kernel void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
75 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64*
76 %fload = load volatile i64, i64* %fptr, align 8
77 store i64 %fload, i64 addrspace(1)* %out, align 8
81 ; CHECK-LABEL: load_flat_v4i32:
82 ; CHECK: flat_load_dwordx4
83 define amdgpu_kernel void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
84 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32>*
85 %fload = load volatile <4 x i32>, <4 x i32>* %fptr, align 32
86 store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
90 ; CHECK-LABEL: sextload_flat_i8:
91 ; CHECK: flat_load_sbyte
92 define amdgpu_kernel void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
93 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
94 %fload = load volatile i8, i8* %fptr, align 4
95 %ext = sext i8 %fload to i32
96 store i32 %ext, i32 addrspace(1)* %out, align 4
100 ; CHECK-LABEL: zextload_flat_i8:
101 ; CHECK: flat_load_ubyte
102 define amdgpu_kernel void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
103 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
104 %fload = load volatile i8, i8* %fptr, align 4
105 %ext = zext i8 %fload to i32
106 store i32 %ext, i32 addrspace(1)* %out, align 4
110 ; CHECK-LABEL: sextload_flat_i16:
111 ; CHECK: flat_load_sshort
112 define amdgpu_kernel void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
113 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
114 %fload = load volatile i16, i16* %fptr, align 4
115 %ext = sext i16 %fload to i32
116 store i32 %ext, i32 addrspace(1)* %out, align 4
120 ; CHECK-LABEL: zextload_flat_i16:
121 ; CHECK: flat_load_ushort
122 define amdgpu_kernel void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
123 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
124 %fload = load volatile i16, i16* %fptr, align 4
125 %ext = zext i16 %fload to i32
126 store i32 %ext, i32 addrspace(1)* %out, align 4
130 ; CHECK-LABEL: flat_scratch_unaligned_load:
131 ; CHECK: flat_load_ubyte
132 ; CHECK: flat_load_ubyte
133 ; CHECK: flat_load_ubyte
134 ; CHECK: flat_load_ubyte
135 define amdgpu_kernel void @flat_scratch_unaligned_load() {
136 %scratch = alloca i32, addrspace(5)
137 %fptr = addrspacecast i32 addrspace(5)* %scratch to i32*
138 %ld = load volatile i32, i32* %fptr, align 1
142 ; CHECK-LABEL: flat_scratch_unaligned_store:
143 ; CHECK: flat_store_byte
144 ; CHECK: flat_store_byte
145 ; CHECK: flat_store_byte
146 ; CHECK: flat_store_byte
147 define amdgpu_kernel void @flat_scratch_unaligned_store() {
148 %scratch = alloca i32, addrspace(5)
149 %fptr = addrspacecast i32 addrspace(5)* %scratch to i32*
150 store volatile i32 0, i32* %fptr, align 1
154 ; CHECK-LABEL: flat_scratch_multidword_load:
155 ; HSA: flat_load_dword
156 ; HSA: flat_load_dword
157 ; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
158 define amdgpu_kernel void @flat_scratch_multidword_load() {
159 %scratch = alloca <2 x i32>, addrspace(5)
160 %fptr = addrspacecast <2 x i32> addrspace(5)* %scratch to <2 x i32>*
161 %ld = load volatile <2 x i32>, <2 x i32>* %fptr
165 ; CHECK-LABEL: flat_scratch_multidword_store:
166 ; HSA: flat_store_dword
167 ; HSA: flat_store_dword
168 ; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
169 define amdgpu_kernel void @flat_scratch_multidword_store() {
170 %scratch = alloca <2 x i32>, addrspace(5)
171 %fptr = addrspacecast <2 x i32> addrspace(5)* %scratch to <2 x i32>*
172 store volatile <2 x i32> zeroinitializer, <2 x i32>* %fptr
176 ; CHECK-LABEL: {{^}}store_flat_i8_max_offset:
177 ; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
178 ; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4095{{$}}
179 define amdgpu_kernel void @store_flat_i8_max_offset(i8* %fptr, i8 %x) #0 {
180 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4095
181 store volatile i8 %x, i8* %fptr.offset
185 ; CHECK-LABEL: {{^}}store_flat_i8_max_offset_p1:
186 ; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
187 define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8* %fptr, i8 %x) #0 {
188 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4096
189 store volatile i8 %x, i8* %fptr.offset
193 ; CHECK-LABEL: {{^}}store_flat_i8_neg_offset:
194 ; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
195 define amdgpu_kernel void @store_flat_i8_neg_offset(i8* %fptr, i8 %x) #0 {
196 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
197 store volatile i8 %x, i8* %fptr.offset
201 ; CHECK-LABEL: {{^}}load_flat_i8_max_offset:
202 ; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
203 ; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
204 define amdgpu_kernel void @load_flat_i8_max_offset(i8* %fptr) #0 {
205 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4095
206 %val = load volatile i8, i8* %fptr.offset
210 ; CHECK-LABEL: {{^}}load_flat_i8_max_offset_p1:
211 ; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
212 define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8* %fptr) #0 {
213 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4096
214 %val = load volatile i8, i8* %fptr.offset
218 ; CHECK-LABEL: {{^}}load_flat_i8_neg_offset:
219 ; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
220 define amdgpu_kernel void @load_flat_i8_neg_offset(i8* %fptr) #0 {
221 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
222 %val = load volatile i8, i8* %fptr.offset
226 attributes #0 = { nounwind }
227 attributes #1 = { nounwind convergent }