1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
3 # RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
4 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
5 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
6 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
7 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
11 name: load_atomic_local_s32_seq_cst
14 tracksRegLiveness: true
20 ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst
21 ; GFX6: liveins: $vgpr0
23 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
24 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
25 ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3)
26 ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
27 ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst
28 ; GFX7: liveins: $vgpr0
30 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
31 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
32 ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3)
33 ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
34 ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst
35 ; GFX9: liveins: $vgpr0
37 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
38 ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 3)
39 ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
40 %0:vgpr(p3) = COPY $vgpr0
41 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 3)
48 name: load_atomic_local_v2s16_seq_cst
51 tracksRegLiveness: true
57 ; GFX6-LABEL: name: load_atomic_local_v2s16_seq_cst
58 ; GFX6: liveins: $vgpr0
60 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
61 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
62 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3)
63 ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
64 ; GFX7-LABEL: name: load_atomic_local_v2s16_seq_cst
65 ; GFX7: liveins: $vgpr0
67 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
68 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
69 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3)
70 ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
71 ; GFX9-LABEL: name: load_atomic_local_v2s16_seq_cst
72 ; GFX9: liveins: $vgpr0
74 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
75 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3)
76 ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
77 %0:vgpr(p3) = COPY $vgpr0
78 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 3)
85 name: load_atomic_local_p3_seq_cst
88 tracksRegLiveness: true
94 ; GFX6-LABEL: name: load_atomic_local_p3_seq_cst
95 ; GFX6: liveins: $vgpr0
97 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
98 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
99 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3)
100 ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](p3)
101 ; GFX7-LABEL: name: load_atomic_local_p3_seq_cst
102 ; GFX7: liveins: $vgpr0
104 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
105 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
106 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3)
107 ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3)
108 ; GFX9-LABEL: name: load_atomic_local_p3_seq_cst
109 ; GFX9: liveins: $vgpr0
111 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
112 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3)
113 ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3)
114 %0:vgpr(p3) = COPY $vgpr0
115 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 3)
122 name: load_atomic_local_s64_seq_cst
124 regBankSelected: true
125 tracksRegLiveness: true
131 ; GFX6-LABEL: name: load_atomic_local_s64_seq_cst
132 ; GFX6: liveins: $vgpr0
134 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
135 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
136 ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3)
137 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
138 ; GFX7-LABEL: name: load_atomic_local_s64_seq_cst
139 ; GFX7: liveins: $vgpr0
141 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
142 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
143 ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3)
144 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
145 ; GFX9-LABEL: name: load_atomic_local_s64_seq_cst
146 ; GFX9: liveins: $vgpr0
148 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
149 ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 3)
150 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
151 %0:vgpr(p3) = COPY $vgpr0
152 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 3)
153 $vgpr0_vgpr1 = COPY %1
159 name: load_atomic_local_v2s32_seq_cst
161 regBankSelected: true
162 tracksRegLiveness: true
168 ; GFX6-LABEL: name: load_atomic_local_v2s32_seq_cst
169 ; GFX6: liveins: $vgpr0
171 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
172 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
173 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3)
174 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
175 ; GFX7-LABEL: name: load_atomic_local_v2s32_seq_cst
176 ; GFX7: liveins: $vgpr0
178 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
179 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
180 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3)
181 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
182 ; GFX9-LABEL: name: load_atomic_local_v2s32_seq_cst
183 ; GFX9: liveins: $vgpr0
185 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
186 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3)
187 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
188 %0:vgpr(p3) = COPY $vgpr0
189 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 3)
190 $vgpr0_vgpr1 = COPY %1
196 name: load_atomic_local_v4s16_seq_cst
198 regBankSelected: true
199 tracksRegLiveness: true
205 ; GFX6-LABEL: name: load_atomic_local_v4s16_seq_cst
206 ; GFX6: liveins: $vgpr0
208 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
209 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
210 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3)
211 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
212 ; GFX7-LABEL: name: load_atomic_local_v4s16_seq_cst
213 ; GFX7: liveins: $vgpr0
215 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
216 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
217 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3)
218 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
219 ; GFX9-LABEL: name: load_atomic_local_v4s16_seq_cst
220 ; GFX9: liveins: $vgpr0
222 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
223 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3)
224 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
225 %0:vgpr(p3) = COPY $vgpr0
226 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 3)
227 $vgpr0_vgpr1 = COPY %1
233 name: load_atomic_local_p1_seq_cst
235 regBankSelected: true
236 tracksRegLiveness: true
242 ; GFX6-LABEL: name: load_atomic_local_p1_seq_cst
243 ; GFX6: liveins: $vgpr0
245 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
246 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
247 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3)
248 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
249 ; GFX7-LABEL: name: load_atomic_local_p1_seq_cst
250 ; GFX7: liveins: $vgpr0
252 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
253 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
254 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3)
255 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
256 ; GFX9-LABEL: name: load_atomic_local_p1_seq_cst
257 ; GFX9: liveins: $vgpr0
259 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
260 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3)
261 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
262 %0:vgpr(p3) = COPY $vgpr0
263 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 3)
264 $vgpr0_vgpr1 = COPY %1
270 name: load_atomic_local_p0_seq_cst
272 regBankSelected: true
273 tracksRegLiveness: true
279 ; GFX6-LABEL: name: load_atomic_local_p0_seq_cst
280 ; GFX6: liveins: $vgpr0
282 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
283 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
284 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3)
285 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
286 ; GFX7-LABEL: name: load_atomic_local_p0_seq_cst
287 ; GFX7: liveins: $vgpr0
289 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
290 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
291 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3)
292 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
293 ; GFX9-LABEL: name: load_atomic_local_p0_seq_cst
294 ; GFX9: liveins: $vgpr0
296 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
297 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3)
298 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
299 %0:vgpr(p3) = COPY $vgpr0
300 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 3)
301 $vgpr0_vgpr1 = COPY %1
307 name: load_atomic_local_s32_seq_cst_gep_65535
309 regBankSelected: true
310 tracksRegLiveness: true
316 ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
317 ; GFX6: liveins: $vgpr0
319 ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
320 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
321 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
322 ; GFX6-NEXT: $m0 = S_MOV_B32 -1
323 ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3)
324 ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
325 ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
326 ; GFX7: liveins: $vgpr0
328 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
329 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
330 ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3)
331 ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
332 ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
333 ; GFX9: liveins: $vgpr0
335 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
336 ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load seq_cst (s32), addrspace 3)
337 ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
338 %0:vgpr(p3) = COPY $vgpr0
339 %1:vgpr(s32) = G_CONSTANT i32 65535
340 %2:vgpr(p3) = G_PTR_ADD %0, %1
341 %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 3)