1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -passes=atomic-expand < %s | FileCheck -check-prefix=IR %s
3 ; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefix=GCN %s
5 define i32 @load_atomic_private_seq_cst_i32(ptr addrspace(5) %ptr) {
6 ; IR-LABEL: define i32 @load_atomic_private_seq_cst_i32(
7 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
8 ; IR-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
9 ; IR-NEXT: ret i32 [[LOAD]]
11 ; GCN-LABEL: load_atomic_private_seq_cst_i32:
13 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
15 ; GCN-NEXT: s_waitcnt vmcnt(0)
16 ; GCN-NEXT: s_setpc_b64 s[30:31]
17 %load = load atomic i32, ptr addrspace(5) %ptr seq_cst, align 4
21 define i64 @load_atomic_private_seq_cst_i64(ptr addrspace(5) %ptr) {
22 ; IR-LABEL: define i64 @load_atomic_private_seq_cst_i64(
23 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
24 ; IR-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(5) [[PTR]], align 8
25 ; IR-NEXT: ret i64 [[LOAD]]
27 ; GCN-LABEL: load_atomic_private_seq_cst_i64:
29 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GCN-NEXT: v_add_i32_e32 v1, vcc, 4, v0
31 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
32 ; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen
33 ; GCN-NEXT: s_waitcnt vmcnt(0)
34 ; GCN-NEXT: s_setpc_b64 s[30:31]
35 %load = load atomic i64, ptr addrspace(5) %ptr seq_cst, align 8
39 define void @atomic_store_seq_cst_i32(ptr addrspace(5) %ptr, i32 %val) {
40 ; IR-LABEL: define void @atomic_store_seq_cst_i32(
41 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
42 ; IR-NEXT: store i32 [[VAL]], ptr addrspace(5) [[PTR]], align 4
45 ; GCN-LABEL: atomic_store_seq_cst_i32:
47 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
49 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
50 ; GCN-NEXT: s_setpc_b64 s[30:31]
51 store atomic i32 %val, ptr addrspace(5) %ptr seq_cst, align 4
55 define void @atomic_store_seq_cst_i64(ptr addrspace(5) %ptr, i64 %val) {
56 ; IR-LABEL: define void @atomic_store_seq_cst_i64(
57 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]], i64 [[VAL:%.*]]) #[[ATTR0]] {
58 ; IR-NEXT: store i64 [[VAL]], ptr addrspace(5) [[PTR]], align 8
61 ; GCN-LABEL: atomic_store_seq_cst_i64:
63 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64 ; GCN-NEXT: v_add_i32_e32 v3, vcc, 4, v0
65 ; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
66 ; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
67 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
68 ; GCN-NEXT: s_setpc_b64 s[30:31]
69 store atomic i64 %val, ptr addrspace(5) %ptr seq_cst, align 8
73 define i32 @load_atomic_private_seq_cst_syncscope_i32(ptr addrspace(5) %ptr) {
74 ; IR-LABEL: define i32 @load_atomic_private_seq_cst_syncscope_i32(
75 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
76 ; IR-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
77 ; IR-NEXT: ret i32 [[LOAD]]
79 ; GCN-LABEL: load_atomic_private_seq_cst_syncscope_i32:
81 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
83 ; GCN-NEXT: s_waitcnt vmcnt(0)
84 ; GCN-NEXT: s_setpc_b64 s[30:31]
85 %load = load atomic i32, ptr addrspace(5) %ptr syncscope("agent") seq_cst, align 4
89 define void @atomic_store_seq_cst_syncscope_i32(ptr addrspace(5) %ptr, i32 %val) {
90 ; IR-LABEL: define void @atomic_store_seq_cst_syncscope_i32(
91 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
92 ; IR-NEXT: store i32 [[VAL]], ptr addrspace(5) [[PTR]], align 4
95 ; GCN-LABEL: atomic_store_seq_cst_syncscope_i32:
97 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98 ; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
99 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
100 ; GCN-NEXT: s_setpc_b64 s[30:31]
101 store atomic i32 %val, ptr addrspace(5) %ptr syncscope("agent") seq_cst, align 4
105 define i32 @cmpxchg_private_i32(ptr addrspace(5) %ptr) {
106 ; IR-LABEL: define i32 @cmpxchg_private_i32(
107 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
108 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
109 ; IR-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
110 ; IR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 1, i32 [[TMP1]]
111 ; IR-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[PTR]], align 4
112 ; IR-NEXT: [[TMP4:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP1]], 0
113 ; IR-NEXT: [[TMP5:%.*]] = insertvalue { i32, i1 } [[TMP4]], i1 [[TMP2]], 1
114 ; IR-NEXT: [[RESULT_0:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
115 ; IR-NEXT: [[RESULT_1:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
116 ; IR-NEXT: store i1 [[RESULT_1]], ptr addrspace(1) poison, align 1
117 ; IR-NEXT: ret i32 [[RESULT_0]]
119 ; GCN-LABEL: cmpxchg_private_i32:
121 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
123 ; GCN-NEXT: s_mov_b32 s7, 0xf000
124 ; GCN-NEXT: s_mov_b32 s6, -1
125 ; GCN-NEXT: s_waitcnt vmcnt(0)
126 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
127 ; GCN-NEXT: v_cndmask_b32_e64 v2, v1, 1, vcc
128 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
129 ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
130 ; GCN-NEXT: buffer_store_byte v0, off, s[4:7], 0
131 ; GCN-NEXT: s_waitcnt expcnt(0)
132 ; GCN-NEXT: v_mov_b32_e32 v0, v1
133 ; GCN-NEXT: s_waitcnt vmcnt(0)
134 ; GCN-NEXT: s_setpc_b64 s[30:31]
135 %result = cmpxchg ptr addrspace(5) %ptr, i32 0, i32 1 acq_rel monotonic
136 %result.0 = extractvalue { i32, i1 } %result, 0
137 %result.1 = extractvalue { i32, i1 } %result, 1
138 store i1 %result.1, ptr addrspace(1) poison
142 define i64 @cmpxchg_private_i64(ptr addrspace(5) %ptr) {
143 ; IR-LABEL: define i64 @cmpxchg_private_i64(
144 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
145 ; IR-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(5) [[PTR]], align 8
146 ; IR-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
147 ; IR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 1, i64 [[TMP1]]
148 ; IR-NEXT: store i64 [[TMP3]], ptr addrspace(5) [[PTR]], align 8
149 ; IR-NEXT: [[TMP4:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP1]], 0
150 ; IR-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } [[TMP4]], i1 [[TMP2]], 1
151 ; IR-NEXT: [[RESULT_0:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
152 ; IR-NEXT: [[RESULT_1:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
153 ; IR-NEXT: store i1 [[RESULT_1]], ptr addrspace(1) poison, align 1
154 ; IR-NEXT: ret i64 [[RESULT_0]]
156 ; GCN-LABEL: cmpxchg_private_i64:
158 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159 ; GCN-NEXT: v_mov_b32_e32 v2, v0
160 ; GCN-NEXT: v_add_i32_e32 v3, vcc, 4, v2
161 ; GCN-NEXT: buffer_load_dword v1, v3, s[0:3], 0 offen
162 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
163 ; GCN-NEXT: s_mov_b32 s7, 0xf000
164 ; GCN-NEXT: s_mov_b32 s6, -1
165 ; GCN-NEXT: s_waitcnt vmcnt(0)
166 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
167 ; GCN-NEXT: v_cndmask_b32_e64 v4, v1, 0, vcc
168 ; GCN-NEXT: buffer_store_dword v4, v3, s[0:3], 0 offen
169 ; GCN-NEXT: v_cndmask_b32_e64 v3, v0, 1, vcc
170 ; GCN-NEXT: s_waitcnt expcnt(0)
171 ; GCN-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
172 ; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen
173 ; GCN-NEXT: buffer_store_byte v4, off, s[4:7], 0
174 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
175 ; GCN-NEXT: s_setpc_b64 s[30:31]
176 %result = cmpxchg ptr addrspace(5) %ptr, i64 0, i64 1 acq_rel monotonic
177 %result.0 = extractvalue { i64, i1 } %result, 0
178 %result.1 = extractvalue { i64, i1 } %result, 1
179 store i1 %result.1, ptr addrspace(1) poison
184 define i32 @atomicrmw_xchg_private_i32(ptr addrspace(5) %ptr) {
185 ; IR-LABEL: define i32 @atomicrmw_xchg_private_i32(
186 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
187 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
188 ; IR-NEXT: store i32 4, ptr addrspace(5) [[PTR]], align 4
189 ; IR-NEXT: ret i32 [[TMP1]]
191 ; GCN-LABEL: atomicrmw_xchg_private_i32:
193 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
195 ; GCN-NEXT: v_mov_b32_e32 v2, 4
196 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
197 ; GCN-NEXT: s_waitcnt vmcnt(1)
198 ; GCN-NEXT: v_mov_b32_e32 v0, v1
199 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
200 ; GCN-NEXT: s_setpc_b64 s[30:31]
201 %result = atomicrmw xchg ptr addrspace(5) %ptr, i32 4 seq_cst
205 define i32 @atomicrmw_add_private_i32(ptr addrspace(5) %ptr) {
206 ; IR-LABEL: define i32 @atomicrmw_add_private_i32(
207 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
208 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
209 ; IR-NEXT: [[NEW:%.*]] = add i32 [[TMP1]], 4
210 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
211 ; IR-NEXT: ret i32 [[TMP1]]
213 ; GCN-LABEL: atomicrmw_add_private_i32:
215 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
217 ; GCN-NEXT: s_waitcnt vmcnt(0)
218 ; GCN-NEXT: v_add_i32_e32 v2, vcc, 4, v1
219 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
220 ; GCN-NEXT: v_mov_b32_e32 v0, v1
221 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
222 ; GCN-NEXT: s_setpc_b64 s[30:31]
223 %result = atomicrmw add ptr addrspace(5) %ptr, i32 4 seq_cst
227 define i32 @atomicrmw_sub_private_i32(ptr addrspace(5) %ptr) {
228 ; IR-LABEL: define i32 @atomicrmw_sub_private_i32(
229 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
230 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
231 ; IR-NEXT: [[NEW:%.*]] = sub i32 [[TMP1]], 4
232 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
233 ; IR-NEXT: ret i32 [[TMP1]]
235 ; GCN-LABEL: atomicrmw_sub_private_i32:
237 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
239 ; GCN-NEXT: s_waitcnt vmcnt(0)
240 ; GCN-NEXT: v_add_i32_e32 v2, vcc, -4, v1
241 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
242 ; GCN-NEXT: v_mov_b32_e32 v0, v1
243 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
244 ; GCN-NEXT: s_setpc_b64 s[30:31]
245 %result = atomicrmw sub ptr addrspace(5) %ptr, i32 4 seq_cst
249 define i32 @atomicrmw_and_private_i32(ptr addrspace(5) %ptr) {
250 ; IR-LABEL: define i32 @atomicrmw_and_private_i32(
251 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
252 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
253 ; IR-NEXT: [[NEW:%.*]] = and i32 [[TMP1]], 4
254 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
255 ; IR-NEXT: ret i32 [[TMP1]]
257 ; GCN-LABEL: atomicrmw_and_private_i32:
259 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
260 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
261 ; GCN-NEXT: s_waitcnt vmcnt(0)
262 ; GCN-NEXT: v_and_b32_e32 v2, 4, v1
263 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
264 ; GCN-NEXT: v_mov_b32_e32 v0, v1
265 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
266 ; GCN-NEXT: s_setpc_b64 s[30:31]
267 %result = atomicrmw and ptr addrspace(5) %ptr, i32 4 seq_cst
271 define i32 @atomicrmw_nand_private_i32(ptr addrspace(5) %ptr) {
272 ; IR-LABEL: define i32 @atomicrmw_nand_private_i32(
273 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
274 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
275 ; IR-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 4
276 ; IR-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
277 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
278 ; IR-NEXT: ret i32 [[TMP1]]
280 ; GCN-LABEL: atomicrmw_nand_private_i32:
282 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
284 ; GCN-NEXT: s_waitcnt vmcnt(0)
285 ; GCN-NEXT: v_not_b32_e32 v2, v1
286 ; GCN-NEXT: v_or_b32_e32 v2, -5, v2
287 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
288 ; GCN-NEXT: v_mov_b32_e32 v0, v1
289 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
290 ; GCN-NEXT: s_setpc_b64 s[30:31]
291 %result = atomicrmw nand ptr addrspace(5) %ptr, i32 4 seq_cst
295 define i32 @atomicrmw_or_private_i32(ptr addrspace(5) %ptr) {
296 ; IR-LABEL: define i32 @atomicrmw_or_private_i32(
297 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
298 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
299 ; IR-NEXT: [[NEW:%.*]] = or i32 [[TMP1]], 4
300 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
301 ; IR-NEXT: ret i32 [[TMP1]]
303 ; GCN-LABEL: atomicrmw_or_private_i32:
305 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
307 ; GCN-NEXT: s_waitcnt vmcnt(0)
308 ; GCN-NEXT: v_or_b32_e32 v2, 4, v1
309 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
310 ; GCN-NEXT: v_mov_b32_e32 v0, v1
311 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
312 ; GCN-NEXT: s_setpc_b64 s[30:31]
313 %result = atomicrmw or ptr addrspace(5) %ptr, i32 4 seq_cst
317 define i32 @atomicrmw_xor_private_i32(ptr addrspace(5) %ptr) {
318 ; IR-LABEL: define i32 @atomicrmw_xor_private_i32(
319 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
320 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
321 ; IR-NEXT: [[NEW:%.*]] = xor i32 [[TMP1]], 4
322 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
323 ; IR-NEXT: ret i32 [[TMP1]]
325 ; GCN-LABEL: atomicrmw_xor_private_i32:
327 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
329 ; GCN-NEXT: s_waitcnt vmcnt(0)
330 ; GCN-NEXT: v_xor_b32_e32 v2, 4, v1
331 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
332 ; GCN-NEXT: v_mov_b32_e32 v0, v1
333 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
334 ; GCN-NEXT: s_setpc_b64 s[30:31]
335 %result = atomicrmw xor ptr addrspace(5) %ptr, i32 4 seq_cst
339 define i32 @atomicrmw_max_private_i32(ptr addrspace(5) %ptr) {
340 ; IR-LABEL: define i32 @atomicrmw_max_private_i32(
341 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
342 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
343 ; IR-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 4
344 ; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
345 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
346 ; IR-NEXT: ret i32 [[TMP1]]
348 ; GCN-LABEL: atomicrmw_max_private_i32:
350 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
352 ; GCN-NEXT: s_waitcnt vmcnt(0)
353 ; GCN-NEXT: v_max_i32_e32 v2, 4, v1
354 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
355 ; GCN-NEXT: v_mov_b32_e32 v0, v1
356 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
357 ; GCN-NEXT: s_setpc_b64 s[30:31]
358 %result = atomicrmw max ptr addrspace(5) %ptr, i32 4 seq_cst
362 define i32 @atomicrmw_min_private_i32(ptr addrspace(5) %ptr) {
363 ; IR-LABEL: define i32 @atomicrmw_min_private_i32(
364 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
365 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
366 ; IR-NEXT: [[TMP2:%.*]] = icmp sle i32 [[TMP1]], 4
367 ; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
368 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
369 ; IR-NEXT: ret i32 [[TMP1]]
371 ; GCN-LABEL: atomicrmw_min_private_i32:
373 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
375 ; GCN-NEXT: s_waitcnt vmcnt(0)
376 ; GCN-NEXT: v_min_i32_e32 v2, 4, v1
377 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
378 ; GCN-NEXT: v_mov_b32_e32 v0, v1
379 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
380 ; GCN-NEXT: s_setpc_b64 s[30:31]
381 %result = atomicrmw min ptr addrspace(5) %ptr, i32 4 seq_cst
385 define i32 @atomicrmw_umax_private_i32(ptr addrspace(5) %ptr) {
386 ; IR-LABEL: define i32 @atomicrmw_umax_private_i32(
387 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
388 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
389 ; IR-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 4
390 ; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
391 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
392 ; IR-NEXT: ret i32 [[TMP1]]
394 ; GCN-LABEL: atomicrmw_umax_private_i32:
396 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
397 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
398 ; GCN-NEXT: s_waitcnt vmcnt(0)
399 ; GCN-NEXT: v_max_u32_e32 v2, 4, v1
400 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
401 ; GCN-NEXT: v_mov_b32_e32 v0, v1
402 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
403 ; GCN-NEXT: s_setpc_b64 s[30:31]
404 %result = atomicrmw umax ptr addrspace(5) %ptr, i32 4 seq_cst
408 define i32 @atomicrmw_umin_private_i32(ptr addrspace(5) %ptr) {
409 ; IR-LABEL: define i32 @atomicrmw_umin_private_i32(
410 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
411 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
412 ; IR-NEXT: [[TMP2:%.*]] = icmp ule i32 [[TMP1]], 4
413 ; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
414 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
415 ; IR-NEXT: ret i32 [[TMP1]]
417 ; GCN-LABEL: atomicrmw_umin_private_i32:
419 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
421 ; GCN-NEXT: s_waitcnt vmcnt(0)
422 ; GCN-NEXT: v_min_u32_e32 v2, 4, v1
423 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
424 ; GCN-NEXT: v_mov_b32_e32 v0, v1
425 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
426 ; GCN-NEXT: s_setpc_b64 s[30:31]
427 %result = atomicrmw umin ptr addrspace(5) %ptr, i32 4 seq_cst
431 define float @atomicrmw_fadd_private_f32(ptr addrspace(5) %ptr) {
432 ; IR-LABEL: define float @atomicrmw_fadd_private_f32(
433 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
434 ; IR-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[PTR]], align 4
435 ; IR-NEXT: [[NEW:%.*]] = fadd float [[TMP1]], 2.000000e+00
436 ; IR-NEXT: store float [[NEW]], ptr addrspace(5) [[PTR]], align 4
437 ; IR-NEXT: ret float [[TMP1]]
439 ; GCN-LABEL: atomicrmw_fadd_private_f32:
441 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
443 ; GCN-NEXT: s_waitcnt vmcnt(0)
444 ; GCN-NEXT: v_add_f32_e32 v2, 2.0, v1
445 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
446 ; GCN-NEXT: v_mov_b32_e32 v0, v1
447 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
448 ; GCN-NEXT: s_setpc_b64 s[30:31]
449 %result = atomicrmw fadd ptr addrspace(5) %ptr, float 2.0 seq_cst
453 define bfloat @atomicrmw_fadd_private_bf16(ptr addrspace(5) %ptr) {
454 ; IR-LABEL: define bfloat @atomicrmw_fadd_private_bf16(
455 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
456 ; IR-NEXT: [[TMP1:%.*]] = load bfloat, ptr addrspace(5) [[PTR]], align 2
457 ; IR-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP1]], 0xR4000
458 ; IR-NEXT: store bfloat [[NEW]], ptr addrspace(5) [[PTR]], align 2
459 ; IR-NEXT: ret bfloat [[TMP1]]
461 ; GCN-LABEL: atomicrmw_fadd_private_bf16:
463 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
464 ; GCN-NEXT: buffer_load_ushort v1, v0, s[0:3], 0 offen
465 ; GCN-NEXT: s_waitcnt vmcnt(0)
466 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1
467 ; GCN-NEXT: v_add_f32_e32 v2, 2.0, v1
468 ; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v2
469 ; GCN-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen
470 ; GCN-NEXT: v_mov_b32_e32 v0, v1
471 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
472 ; GCN-NEXT: s_setpc_b64 s[30:31]
473 %result = atomicrmw fadd ptr addrspace(5) %ptr, bfloat 2.0 seq_cst
477 define float @atomicrmw_fsub_private_i32(ptr addrspace(5) %ptr, float %val) {
478 ; IR-LABEL: define float @atomicrmw_fsub_private_i32(
479 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
480 ; IR-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[PTR]], align 4
481 ; IR-NEXT: [[NEW:%.*]] = fsub float [[TMP1]], [[VAL]]
482 ; IR-NEXT: store float [[NEW]], ptr addrspace(5) [[PTR]], align 4
483 ; IR-NEXT: ret float [[TMP1]]
485 ; GCN-LABEL: atomicrmw_fsub_private_i32:
487 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488 ; GCN-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
489 ; GCN-NEXT: s_waitcnt vmcnt(0)
490 ; GCN-NEXT: v_sub_f32_e32 v1, v2, v1
491 ; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
492 ; GCN-NEXT: v_mov_b32_e32 v0, v2
493 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
494 ; GCN-NEXT: s_setpc_b64 s[30:31]
495 %result = atomicrmw fsub ptr addrspace(5) %ptr, float %val seq_cst
499 define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote(ptr addrspace(1) %out, i32 %in) nounwind {
500 ; IR-LABEL: define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote(
501 ; IR-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[IN:%.*]]) #[[ATTR1:[0-9]+]] {
503 ; IR-NEXT: [[TMP:%.*]] = alloca [2 x i32], align 4, addrspace(5)
504 ; IR-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 1
505 ; IR-NEXT: store i32 0, ptr addrspace(5) [[TMP]], align 4
506 ; IR-NEXT: store i32 1, ptr addrspace(5) [[GEP2]], align 4
507 ; IR-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 [[IN]]
508 ; IR-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[GEP3]], align 4
509 ; IR-NEXT: [[NEW:%.*]] = add i32 [[TMP0]], 7
510 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[GEP3]], align 4
511 ; IR-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[OUT]], align 4
514 ; GCN-LABEL: alloca_promote_atomicrmw_private_lds_promote:
515 ; GCN: ; %bb.0: ; %entry
516 ; GCN-NEXT: s_load_dword s6, s[4:5], 0xb
517 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
518 ; GCN-NEXT: s_mov_b32 s3, 0xf000
519 ; GCN-NEXT: s_mov_b32 s2, -1
520 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
521 ; GCN-NEXT: s_cmp_eq_u32 s6, 1
522 ; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
523 ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
524 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
527 %tmp = alloca [2 x i32], addrspace(5)
528 %gep2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1
529 store i32 0, ptr addrspace(5) %tmp
530 store i32 1, ptr addrspace(5) %gep2
531 %gep3 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 %in
532 %rmw = atomicrmw add ptr addrspace(5) %gep3, i32 7 acq_rel
533 store i32 %rmw, ptr addrspace(1) %out
537 define amdgpu_kernel void @alloca_promote_cmpxchg_private(ptr addrspace(1) %out, i32 %in) nounwind {
538 ; IR-LABEL: define amdgpu_kernel void @alloca_promote_cmpxchg_private(
539 ; IR-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[IN:%.*]]) #[[ATTR1]] {
541 ; IR-NEXT: [[TMP:%.*]] = alloca [2 x i32], align 4, addrspace(5)
542 ; IR-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 1
543 ; IR-NEXT: store i32 0, ptr addrspace(5) [[TMP]], align 4
544 ; IR-NEXT: store i32 1, ptr addrspace(5) [[GEP2]], align 4
545 ; IR-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 [[IN]]
546 ; IR-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[GEP3]], align 4
547 ; IR-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
548 ; IR-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 1, i32 [[TMP0]]
549 ; IR-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[GEP3]], align 4
550 ; IR-NEXT: [[TMP3:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP0]], 0
551 ; IR-NEXT: [[TMP4:%.*]] = insertvalue { i32, i1 } [[TMP3]], i1 [[TMP1]], 1
552 ; IR-NEXT: [[VAL:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
553 ; IR-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
556 ; GCN-LABEL: alloca_promote_cmpxchg_private:
557 ; GCN: ; %bb.0: ; %entry
558 ; GCN-NEXT: s_load_dword s6, s[4:5], 0xb
559 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
560 ; GCN-NEXT: s_mov_b32 s3, 0xf000
561 ; GCN-NEXT: s_mov_b32 s2, -1
562 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
563 ; GCN-NEXT: s_cmp_eq_u32 s6, 1
564 ; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
565 ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
566 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
569 %tmp = alloca [2 x i32], addrspace(5)
570 %gep2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1
571 store i32 0, ptr addrspace(5) %tmp
572 store i32 1, ptr addrspace(5) %gep2
573 %gep3 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 %in
574 %xchg = cmpxchg ptr addrspace(5) %gep3, i32 0, i32 1 acq_rel monotonic
575 %val = extractvalue { i32, i1 } %xchg, 0
576 store i32 %val, ptr addrspace(1) %out
580 define i32 @atomicrmw_inc_private_i32(ptr addrspace(5) %ptr) {
581 ; IR-LABEL: define i32 @atomicrmw_inc_private_i32(
582 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
583 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
584 ; IR-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1
585 ; IR-NEXT: [[TMP3:%.*]] = icmp uge i32 [[TMP1]], 4
586 ; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
587 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
588 ; IR-NEXT: ret i32 [[TMP1]]
590 ; GCN-LABEL: atomicrmw_inc_private_i32:
592 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
594 ; GCN-NEXT: s_waitcnt vmcnt(0)
595 ; GCN-NEXT: v_add_i32_e32 v2, vcc, 1, v1
596 ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 4, v1
597 ; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
598 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
599 ; GCN-NEXT: v_mov_b32_e32 v0, v1
600 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
601 ; GCN-NEXT: s_setpc_b64 s[30:31]
602 %result = atomicrmw uinc_wrap ptr addrspace(5) %ptr, i32 4 seq_cst
606 define i32 @atomicrmw_dec_private_i32(ptr addrspace(5) %ptr) {
607 ; IR-LABEL: define i32 @atomicrmw_dec_private_i32(
608 ; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
609 ; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
610 ; IR-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1
611 ; IR-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
612 ; IR-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP1]], 4
613 ; IR-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
614 ; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 4, i32 [[TMP2]]
615 ; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
616 ; IR-NEXT: ret i32 [[TMP1]]
618 ; GCN-LABEL: atomicrmw_dec_private_i32:
620 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621 ; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
622 ; GCN-NEXT: s_waitcnt vmcnt(0)
623 ; GCN-NEXT: v_add_i32_e32 v2, vcc, -1, v1
624 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
625 ; GCN-NEXT: v_cmp_lt_u32_e64 s[4:5], 4, v1
626 ; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
627 ; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 4, s[4:5]
628 ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
629 ; GCN-NEXT: v_mov_b32_e32 v0, v1
630 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
631 ; GCN-NEXT: s_setpc_b64 s[30:31]
632 %result = atomicrmw udec_wrap ptr addrspace(5) %ptr, i32 4 seq_cst