1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
3 # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
4 # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
5 # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
6 # RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
10 name: load_atomic_local_s32_seq_cst
13 tracksRegLiveness: true
19 ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst
20 ; GFX6: liveins: $vgpr0
22 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
23 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
24 ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3)
25 ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
26 ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst
27 ; GFX7: liveins: $vgpr0
29 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
30 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
31 ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3)
32 ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
33 ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst
34 ; GFX9: liveins: $vgpr0
36 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
37 ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 3)
38 ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
39 %0:vgpr(p3) = COPY $vgpr0
40 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 3)
47 name: load_atomic_local_v2s16_seq_cst
50 tracksRegLiveness: true
56 ; GFX6-LABEL: name: load_atomic_local_v2s16_seq_cst
57 ; GFX6: liveins: $vgpr0
59 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
60 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
61 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3)
62 ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
63 ; GFX7-LABEL: name: load_atomic_local_v2s16_seq_cst
64 ; GFX7: liveins: $vgpr0
66 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
67 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
68 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3)
69 ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
70 ; GFX9-LABEL: name: load_atomic_local_v2s16_seq_cst
71 ; GFX9: liveins: $vgpr0
73 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
74 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3)
75 ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
76 %0:vgpr(p3) = COPY $vgpr0
77 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 3)
84 name: load_atomic_local_p3_seq_cst
87 tracksRegLiveness: true
93 ; GFX6-LABEL: name: load_atomic_local_p3_seq_cst
94 ; GFX6: liveins: $vgpr0
96 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
97 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
98 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3)
99 ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](p3)
100 ; GFX7-LABEL: name: load_atomic_local_p3_seq_cst
101 ; GFX7: liveins: $vgpr0
103 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
104 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
105 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3)
106 ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3)
107 ; GFX9-LABEL: name: load_atomic_local_p3_seq_cst
108 ; GFX9: liveins: $vgpr0
110 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
111 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3)
112 ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3)
113 %0:vgpr(p3) = COPY $vgpr0
114 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 3)
121 name: load_atomic_local_s64_seq_cst
123 regBankSelected: true
124 tracksRegLiveness: true
130 ; GFX6-LABEL: name: load_atomic_local_s64_seq_cst
131 ; GFX6: liveins: $vgpr0
133 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
134 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
135 ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3)
136 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
137 ; GFX7-LABEL: name: load_atomic_local_s64_seq_cst
138 ; GFX7: liveins: $vgpr0
140 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
141 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
142 ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3)
143 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
144 ; GFX9-LABEL: name: load_atomic_local_s64_seq_cst
145 ; GFX9: liveins: $vgpr0
147 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
148 ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 3)
149 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
150 %0:vgpr(p3) = COPY $vgpr0
151 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 3)
152 $vgpr0_vgpr1 = COPY %1
158 name: load_atomic_local_v2s32_seq_cst
160 regBankSelected: true
161 tracksRegLiveness: true
167 ; GFX6-LABEL: name: load_atomic_local_v2s32_seq_cst
168 ; GFX6: liveins: $vgpr0
170 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
171 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
172 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3)
173 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
174 ; GFX7-LABEL: name: load_atomic_local_v2s32_seq_cst
175 ; GFX7: liveins: $vgpr0
177 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
178 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
179 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3)
180 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
181 ; GFX9-LABEL: name: load_atomic_local_v2s32_seq_cst
182 ; GFX9: liveins: $vgpr0
184 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
185 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3)
186 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
187 %0:vgpr(p3) = COPY $vgpr0
188 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 3)
189 $vgpr0_vgpr1 = COPY %1
195 name: load_atomic_local_v4s16_seq_cst
197 regBankSelected: true
198 tracksRegLiveness: true
204 ; GFX6-LABEL: name: load_atomic_local_v4s16_seq_cst
205 ; GFX6: liveins: $vgpr0
207 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
208 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
209 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3)
210 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
211 ; GFX7-LABEL: name: load_atomic_local_v4s16_seq_cst
212 ; GFX7: liveins: $vgpr0
214 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
215 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
216 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3)
217 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
218 ; GFX9-LABEL: name: load_atomic_local_v4s16_seq_cst
219 ; GFX9: liveins: $vgpr0
221 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
222 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3)
223 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
224 %0:vgpr(p3) = COPY $vgpr0
225 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 3)
226 $vgpr0_vgpr1 = COPY %1
232 name: load_atomic_local_p1_seq_cst
234 regBankSelected: true
235 tracksRegLiveness: true
241 ; GFX6-LABEL: name: load_atomic_local_p1_seq_cst
242 ; GFX6: liveins: $vgpr0
244 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
245 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
246 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3)
247 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
248 ; GFX7-LABEL: name: load_atomic_local_p1_seq_cst
249 ; GFX7: liveins: $vgpr0
251 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
252 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
253 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3)
254 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
255 ; GFX9-LABEL: name: load_atomic_local_p1_seq_cst
256 ; GFX9: liveins: $vgpr0
258 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
259 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3)
260 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
261 %0:vgpr(p3) = COPY $vgpr0
262 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 3)
263 $vgpr0_vgpr1 = COPY %1
269 name: load_atomic_local_p0_seq_cst
271 regBankSelected: true
272 tracksRegLiveness: true
278 ; GFX6-LABEL: name: load_atomic_local_p0_seq_cst
279 ; GFX6: liveins: $vgpr0
281 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
282 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
283 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3)
284 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
285 ; GFX7-LABEL: name: load_atomic_local_p0_seq_cst
286 ; GFX7: liveins: $vgpr0
288 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
289 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
290 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3)
291 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
292 ; GFX9-LABEL: name: load_atomic_local_p0_seq_cst
293 ; GFX9: liveins: $vgpr0
295 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
296 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3)
297 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
298 %0:vgpr(p3) = COPY $vgpr0
299 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 3)
300 $vgpr0_vgpr1 = COPY %1
306 name: load_atomic_local_s32_seq_cst_gep_65535
308 regBankSelected: true
309 tracksRegLiveness: true
315 ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
316 ; GFX6: liveins: $vgpr0
318 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
319 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
320 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
321 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
322 ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3)
323 ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
324 ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
325 ; GFX7: liveins: $vgpr0
327 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
328 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
329 ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3)
330 ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
331 ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
332 ; GFX9: liveins: $vgpr0
334 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
335 ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load seq_cst (s32), addrspace 3)
336 ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
337 %0:vgpr(p3) = COPY $vgpr0
338 %1:vgpr(s32) = G_CONSTANT i32 65535
339 %2:vgpr(p3) = G_PTR_ADD %0, %1
340 %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 3)