1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
3 # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
4 # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
8 name: load_atomic_local_s32_seq_cst
11 tracksRegLiveness: true
17 ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst
18 ; GFX6: liveins: $vgpr0
19 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
20 ; GFX6: $m0 = S_MOV_B32 -1
21 ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3)
22 ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]]
23 ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst
24 ; GFX7: liveins: $vgpr0
25 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
26 ; GFX7: $m0 = S_MOV_B32 -1
27 ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3)
28 ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
29 ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst
30 ; GFX9: liveins: $vgpr0
31 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
32 ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 4, addrspace 3)
33 ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
34 %0:vgpr(p3) = COPY $vgpr0
35 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3)
42 name: load_atomic_local_v2s16_seq_cst
45 tracksRegLiveness: true
51 ; GFX6-LABEL: name: load_atomic_local_v2s16_seq_cst
52 ; GFX6: liveins: $vgpr0
53 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
54 ; GFX6: $m0 = S_MOV_B32 -1
55 ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
56 ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>)
57 ; GFX7-LABEL: name: load_atomic_local_v2s16_seq_cst
58 ; GFX7: liveins: $vgpr0
59 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
60 ; GFX7: $m0 = S_MOV_B32 -1
61 ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
62 ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>)
63 ; GFX9-LABEL: name: load_atomic_local_v2s16_seq_cst
64 ; GFX9: liveins: $vgpr0
65 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
66 ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
67 ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
68 %0:vgpr(p3) = COPY $vgpr0
69 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3)
76 name: load_atomic_local_p3_seq_cst
79 tracksRegLiveness: true
85 ; GFX6-LABEL: name: load_atomic_local_p3_seq_cst
86 ; GFX6: liveins: $vgpr0
87 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
88 ; GFX6: $m0 = S_MOV_B32 -1
89 ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
90 ; GFX6: $vgpr0 = COPY [[LOAD]](p3)
91 ; GFX7-LABEL: name: load_atomic_local_p3_seq_cst
92 ; GFX7: liveins: $vgpr0
93 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
94 ; GFX7: $m0 = S_MOV_B32 -1
95 ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
96 ; GFX7: $vgpr0 = COPY [[LOAD]](p3)
97 ; GFX9-LABEL: name: load_atomic_local_p3_seq_cst
98 ; GFX9: liveins: $vgpr0
99 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
100 ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3)
101 ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
102 %0:vgpr(p3) = COPY $vgpr0
103 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3)
110 name: load_atomic_local_s64_seq_cst
112 regBankSelected: true
113 tracksRegLiveness: true
119 ; GFX6-LABEL: name: load_atomic_local_s64_seq_cst
120 ; GFX6: liveins: $vgpr0
121 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
122 ; GFX6: $m0 = S_MOV_B32 -1
123 ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3)
124 ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
125 ; GFX7-LABEL: name: load_atomic_local_s64_seq_cst
126 ; GFX7: liveins: $vgpr0
127 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
128 ; GFX7: $m0 = S_MOV_B32 -1
129 ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3)
130 ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
131 ; GFX9-LABEL: name: load_atomic_local_s64_seq_cst
132 ; GFX9: liveins: $vgpr0
133 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
134 ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 8, addrspace 3)
135 ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
136 %0:vgpr(p3) = COPY $vgpr0
137 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3)
138 $vgpr0_vgpr1 = COPY %1
144 name: load_atomic_local_v2s32_seq_cst
146 regBankSelected: true
147 tracksRegLiveness: true
153 ; GFX6-LABEL: name: load_atomic_local_v2s32_seq_cst
154 ; GFX6: liveins: $vgpr0
155 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
156 ; GFX6: $m0 = S_MOV_B32 -1
157 ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
158 ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
159 ; GFX7-LABEL: name: load_atomic_local_v2s32_seq_cst
160 ; GFX7: liveins: $vgpr0
161 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
162 ; GFX7: $m0 = S_MOV_B32 -1
163 ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
164 ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
165 ; GFX9-LABEL: name: load_atomic_local_v2s32_seq_cst
166 ; GFX9: liveins: $vgpr0
167 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
168 ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
169 ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
170 %0:vgpr(p3) = COPY $vgpr0
171 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3)
172 $vgpr0_vgpr1 = COPY %1
178 name: load_atomic_local_v4s16_seq_cst
180 regBankSelected: true
181 tracksRegLiveness: true
187 ; GFX6-LABEL: name: load_atomic_local_v4s16_seq_cst
188 ; GFX6: liveins: $vgpr0
189 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
190 ; GFX6: $m0 = S_MOV_B32 -1
191 ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
192 ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
193 ; GFX7-LABEL: name: load_atomic_local_v4s16_seq_cst
194 ; GFX7: liveins: $vgpr0
195 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
196 ; GFX7: $m0 = S_MOV_B32 -1
197 ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
198 ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
199 ; GFX9-LABEL: name: load_atomic_local_v4s16_seq_cst
200 ; GFX9: liveins: $vgpr0
201 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
202 ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
203 ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
204 %0:vgpr(p3) = COPY $vgpr0
205 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3)
206 $vgpr0_vgpr1 = COPY %1
212 name: load_atomic_local_p1_seq_cst
214 regBankSelected: true
215 tracksRegLiveness: true
221 ; GFX6-LABEL: name: load_atomic_local_p1_seq_cst
222 ; GFX6: liveins: $vgpr0
223 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
224 ; GFX6: $m0 = S_MOV_B32 -1
225 ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
226 ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
227 ; GFX7-LABEL: name: load_atomic_local_p1_seq_cst
228 ; GFX7: liveins: $vgpr0
229 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
230 ; GFX7: $m0 = S_MOV_B32 -1
231 ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
232 ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
233 ; GFX9-LABEL: name: load_atomic_local_p1_seq_cst
234 ; GFX9: liveins: $vgpr0
235 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
236 ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
237 ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
238 %0:vgpr(p3) = COPY $vgpr0
239 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3)
240 $vgpr0_vgpr1 = COPY %1
246 name: load_atomic_local_p0_seq_cst
248 regBankSelected: true
249 tracksRegLiveness: true
255 ; GFX6-LABEL: name: load_atomic_local_p0_seq_cst
256 ; GFX6: liveins: $vgpr0
257 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
258 ; GFX6: $m0 = S_MOV_B32 -1
259 ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
260 ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
261 ; GFX7-LABEL: name: load_atomic_local_p0_seq_cst
262 ; GFX7: liveins: $vgpr0
263 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
264 ; GFX7: $m0 = S_MOV_B32 -1
265 ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
266 ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
267 ; GFX9-LABEL: name: load_atomic_local_p0_seq_cst
268 ; GFX9: liveins: $vgpr0
269 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
270 ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3)
271 ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
272 %0:vgpr(p3) = COPY $vgpr0
273 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3)
274 $vgpr0_vgpr1 = COPY %1
280 name: load_atomic_local_s32_seq_cst_gep_65535
282 regBankSelected: true
283 tracksRegLiveness: true
289 ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
290 ; GFX6: liveins: $vgpr0
291 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
292 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
293 ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
294 ; GFX6: $m0 = S_MOV_B32 -1
295 ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 %2, 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3)
296 ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]]
297 ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
298 ; GFX7: liveins: $vgpr0
299 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
300 ; GFX7: $m0 = S_MOV_B32 -1
301 ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3)
302 ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
303 ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535
304 ; GFX9: liveins: $vgpr0
305 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
306 ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load seq_cst 4, addrspace 3)
307 ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
308 %0:vgpr(p3) = COPY $vgpr0
309 %1:vgpr(s32) = G_CONSTANT i32 65535
310 %2:vgpr(p3) = G_GEP %0, %1
311 %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 3)