1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,HAWAII %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FIJI %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
6 define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 {
7 ; CIVI-LABEL: local_store_i56:
9 ; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; CIVI-NEXT: s_mov_b32 m0, -1
11 ; CIVI-NEXT: v_lshrrev_b32_e32 v3, 16, v2
12 ; CIVI-NEXT: ds_write_b16 v0, v2 offset:4
13 ; CIVI-NEXT: ds_write_b32 v0, v1
14 ; CIVI-NEXT: ds_write_b8 v0, v3 offset:6
15 ; CIVI-NEXT: s_waitcnt lgkmcnt(0)
16 ; CIVI-NEXT: s_setpc_b64 s[30:31]
18 ; GFX9-LABEL: local_store_i56:
20 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21 ; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:6
22 ; GFX9-NEXT: ds_write_b16 v0, v2 offset:4
23 ; GFX9-NEXT: ds_write_b32 v0, v1
24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
25 ; GFX9-NEXT: s_setpc_b64 s[30:31]
26 store i56 %arg, i56 addrspace(3)* %ptr, align 8
30 define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 {
31 ; HAWAII-LABEL: local_store_i55:
33 ; HAWAII-NEXT: s_add_u32 s0, s4, 14
34 ; HAWAII-NEXT: s_addc_u32 s1, s5, 0
35 ; HAWAII-NEXT: v_mov_b32_e32 v0, s0
36 ; HAWAII-NEXT: v_mov_b32_e32 v1, s1
37 ; HAWAII-NEXT: flat_load_ubyte v0, v[0:1]
38 ; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0
39 ; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2
40 ; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3
41 ; HAWAII-NEXT: s_mov_b32 m0, -1
42 ; HAWAII-NEXT: s_waitcnt lgkmcnt(0)
43 ; HAWAII-NEXT: v_mov_b32_e32 v1, s0
44 ; HAWAII-NEXT: v_mov_b32_e32 v3, s1
45 ; HAWAII-NEXT: v_mov_b32_e32 v2, s2
46 ; HAWAII-NEXT: ds_write_b16 v1, v2 offset:4
47 ; HAWAII-NEXT: s_waitcnt vmcnt(0)
48 ; HAWAII-NEXT: v_and_b32_e32 v0, 0x7f, v0
49 ; HAWAII-NEXT: ds_write_b8 v1, v0 offset:6
50 ; HAWAII-NEXT: ds_write_b32 v1, v3
51 ; HAWAII-NEXT: s_endpgm
53 ; FIJI-LABEL: local_store_i55:
55 ; FIJI-NEXT: s_add_u32 s0, s4, 14
56 ; FIJI-NEXT: s_addc_u32 s1, s5, 0
57 ; FIJI-NEXT: v_mov_b32_e32 v0, s0
58 ; FIJI-NEXT: v_mov_b32_e32 v1, s1
59 ; FIJI-NEXT: flat_load_ubyte v0, v[0:1]
60 ; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0
61 ; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8
62 ; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc
63 ; FIJI-NEXT: s_mov_b32 m0, -1
64 ; FIJI-NEXT: s_waitcnt lgkmcnt(0)
65 ; FIJI-NEXT: v_mov_b32_e32 v1, s0
66 ; FIJI-NEXT: v_mov_b32_e32 v3, s1
67 ; FIJI-NEXT: v_mov_b32_e32 v2, s2
68 ; FIJI-NEXT: ds_write_b16 v1, v2 offset:4
69 ; FIJI-NEXT: s_waitcnt vmcnt(0)
70 ; FIJI-NEXT: v_and_b32_e32 v0, 0x7f, v0
71 ; FIJI-NEXT: ds_write_b8 v1, v0 offset:6
72 ; FIJI-NEXT: ds_write_b32 v1, v3
75 ; GFX9-LABEL: local_store_i55:
77 ; GFX9-NEXT: v_mov_b32_e32 v0, s4
78 ; GFX9-NEXT: v_mov_b32_e32 v1, s5
79 ; GFX9-NEXT: global_load_ubyte_d16_hi v0, v[0:1], off offset:14
80 ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
81 ; GFX9-NEXT: s_load_dword s1, s[4:5], 0x8
82 ; GFX9-NEXT: s_load_dword s2, s[4:5], 0xc
83 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
84 ; GFX9-NEXT: v_mov_b32_e32 v1, s0
85 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
86 ; GFX9-NEXT: v_mov_b32_e32 v2, s2
87 ; GFX9-NEXT: ds_write_b16 v1, v2 offset:4
88 ; GFX9-NEXT: s_waitcnt vmcnt(0)
89 ; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 7
90 ; GFX9-NEXT: ds_write_b8 v1, v0 offset:6
91 ; GFX9-NEXT: ds_write_b32 v1, v3
93 store i55 %arg, i55 addrspace(3)* %ptr, align 8
97 define amdgpu_kernel void @local_store_i48(i48 addrspace(3)* %ptr, i48 %arg) #0 {
98 ; HAWAII-LABEL: local_store_i48:
100 ; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0
101 ; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2
102 ; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3
103 ; HAWAII-NEXT: s_mov_b32 m0, -1
104 ; HAWAII-NEXT: s_waitcnt lgkmcnt(0)
105 ; HAWAII-NEXT: v_mov_b32_e32 v0, s0
106 ; HAWAII-NEXT: v_mov_b32_e32 v1, s1
107 ; HAWAII-NEXT: v_mov_b32_e32 v2, s2
108 ; HAWAII-NEXT: ds_write_b16 v0, v2 offset:4
109 ; HAWAII-NEXT: ds_write_b32 v0, v1
110 ; HAWAII-NEXT: s_endpgm
112 ; FIJI-LABEL: local_store_i48:
114 ; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0
115 ; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8
116 ; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc
117 ; FIJI-NEXT: s_mov_b32 m0, -1
118 ; FIJI-NEXT: s_waitcnt lgkmcnt(0)
119 ; FIJI-NEXT: v_mov_b32_e32 v0, s0
120 ; FIJI-NEXT: v_mov_b32_e32 v1, s1
121 ; FIJI-NEXT: v_mov_b32_e32 v2, s2
122 ; FIJI-NEXT: ds_write_b16 v0, v2 offset:4
123 ; FIJI-NEXT: ds_write_b32 v0, v1
124 ; FIJI-NEXT: s_endpgm
126 ; GFX9-LABEL: local_store_i48:
128 ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
129 ; GFX9-NEXT: s_load_dword s1, s[4:5], 0x8
130 ; GFX9-NEXT: s_load_dword s2, s[4:5], 0xc
131 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
132 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
133 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
134 ; GFX9-NEXT: v_mov_b32_e32 v2, s2
135 ; GFX9-NEXT: ds_write_b16 v0, v2 offset:4
136 ; GFX9-NEXT: ds_write_b32 v0, v1
137 ; GFX9-NEXT: s_endpgm
138 store i48 %arg, i48 addrspace(3)* %ptr, align 8
142 define amdgpu_kernel void @local_store_i65(i65 addrspace(3)* %ptr, i65 %arg) #0 {
143 ; HAWAII-LABEL: local_store_i65:
145 ; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x0
146 ; HAWAII-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2
147 ; HAWAII-NEXT: s_load_dword s3, s[4:5], 0x4
148 ; HAWAII-NEXT: s_mov_b32 m0, -1
149 ; HAWAII-NEXT: s_waitcnt lgkmcnt(0)
150 ; HAWAII-NEXT: v_mov_b32_e32 v2, s2
151 ; HAWAII-NEXT: v_mov_b32_e32 v0, s0
152 ; HAWAII-NEXT: v_mov_b32_e32 v1, s1
153 ; HAWAII-NEXT: s_and_b32 s0, s3, 1
154 ; HAWAII-NEXT: v_mov_b32_e32 v3, s0
155 ; HAWAII-NEXT: ds_write_b8 v2, v3 offset:8
156 ; HAWAII-NEXT: ds_write_b64 v2, v[0:1]
157 ; HAWAII-NEXT: s_endpgm
159 ; FIJI-LABEL: local_store_i65:
161 ; FIJI-NEXT: s_load_dword s2, s[4:5], 0x0
162 ; FIJI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
163 ; FIJI-NEXT: s_load_dword s3, s[4:5], 0x10
164 ; FIJI-NEXT: s_mov_b32 m0, -1
165 ; FIJI-NEXT: s_waitcnt lgkmcnt(0)
166 ; FIJI-NEXT: v_mov_b32_e32 v2, s2
167 ; FIJI-NEXT: v_mov_b32_e32 v0, s0
168 ; FIJI-NEXT: v_mov_b32_e32 v1, s1
169 ; FIJI-NEXT: s_and_b32 s0, s3, 1
170 ; FIJI-NEXT: v_mov_b32_e32 v3, s0
171 ; FIJI-NEXT: ds_write_b8 v2, v3 offset:8
172 ; FIJI-NEXT: ds_write_b64 v2, v[0:1]
173 ; FIJI-NEXT: s_endpgm
175 ; GFX9-LABEL: local_store_i65:
177 ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x0
178 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
179 ; GFX9-NEXT: s_load_dword s3, s[4:5], 0x10
180 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
181 ; GFX9-NEXT: v_mov_b32_e32 v2, s2
182 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
183 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
184 ; GFX9-NEXT: s_and_b32 s0, s3, 1
185 ; GFX9-NEXT: v_mov_b32_e32 v3, s0
186 ; GFX9-NEXT: ds_write_b8 v2, v3 offset:8
187 ; GFX9-NEXT: ds_write_b64 v2, v[0:1]
188 ; GFX9-NEXT: s_endpgm
189 store i65 %arg, i65 addrspace(3)* %ptr, align 8
193 define void @local_store_i13(i13 addrspace(3)* %ptr, i13 %arg) #0 {
194 ; CIVI-LABEL: local_store_i13:
196 ; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; CIVI-NEXT: v_and_b32_e32 v1, 0x1fff, v1
198 ; CIVI-NEXT: s_mov_b32 m0, -1
199 ; CIVI-NEXT: ds_write_b16 v0, v1
200 ; CIVI-NEXT: s_waitcnt lgkmcnt(0)
201 ; CIVI-NEXT: s_setpc_b64 s[30:31]
203 ; GFX9-LABEL: local_store_i13:
205 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX9-NEXT: v_and_b32_e32 v1, 0x1fff, v1
207 ; GFX9-NEXT: ds_write_b16 v0, v1
208 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
209 ; GFX9-NEXT: s_setpc_b64 s[30:31]
210 store i13 %arg, i13 addrspace(3)* %ptr, align 8
214 define void @local_store_i17(i17 addrspace(3)* %ptr, i17 %arg) #0 {
215 ; CIVI-LABEL: local_store_i17:
217 ; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218 ; CIVI-NEXT: v_bfe_u32 v2, v1, 16, 1
219 ; CIVI-NEXT: s_mov_b32 m0, -1
220 ; CIVI-NEXT: ds_write_b16 v0, v1
221 ; CIVI-NEXT: ds_write_b8 v0, v2 offset:2
222 ; CIVI-NEXT: s_waitcnt lgkmcnt(0)
223 ; CIVI-NEXT: s_setpc_b64 s[30:31]
225 ; GFX9-LABEL: local_store_i17:
227 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228 ; GFX9-NEXT: ds_write_b16 v0, v1
229 ; GFX9-NEXT: v_and_b32_e32 v1, 0x1ffff, v1
230 ; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:2
231 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
232 ; GFX9-NEXT: s_setpc_b64 s[30:31]
233 store i17 %arg, i17 addrspace(3)* %ptr, align 8
237 attributes #0 = { nounwind }