1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX803 %s
3 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s
4 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s
5 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s
6 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s
7 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s
8 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s
9 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s
11 ;---------------------------------------------------------------------
13 ;---------------------------------------------------------------------
15 ; xchg is supported over PCIe, so no expansion is necessary
16 define i32 @test_atomicrmw_xchg_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
17 ; COMMON-LABEL: define i32 @test_atomicrmw_xchg_i32_global_system(
18 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
19 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
20 ; COMMON-NEXT: ret i32 [[RES]]
22 %res = atomicrmw xchg ptr addrspace(1) %ptr, i32 %value seq_cst
26 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
27 define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
28 ; COMMON-LABEL: define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_fine_grained_memory(
29 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
30 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
31 ; COMMON-NEXT: ret i32 [[RES]]
33 %res = atomicrmw xchg ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
37 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
38 define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
39 ; COMMON-LABEL: define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_remote_memory(
40 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
41 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
42 ; COMMON-NEXT: ret i32 [[RES]]
44 %res = atomicrmw xchg ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
48 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
49 define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
50 ; COMMON-LABEL: define i32 @test_atomicrmw_xchg_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
51 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
52 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
53 ; COMMON-NEXT: ret i32 [[RES]]
55 %res = atomicrmw xchg ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
59 ;---------------------------------------------------------------------
61 ;---------------------------------------------------------------------
63 ; add is supported over PCIe, so no expansion is necessary
64 define i32 @test_atomicrmw_add_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
65 ; COMMON-LABEL: define i32 @test_atomicrmw_add_i32_global_system(
66 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
67 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
68 ; COMMON-NEXT: ret i32 [[RES]]
70 %res = atomicrmw add ptr addrspace(1) %ptr, i32 %value seq_cst
74 ; add is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
75 define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
76 ; COMMON-LABEL: define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_fine_grained_memory(
77 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
78 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
79 ; COMMON-NEXT: ret i32 [[RES]]
81 %res = atomicrmw add ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
85 ; add is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
86 define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
87 ; COMMON-LABEL: define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_remote_memory(
88 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
89 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
90 ; COMMON-NEXT: ret i32 [[RES]]
92 %res = atomicrmw add ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
96 ; add is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
97 define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
98 ; COMMON-LABEL: define i32 @test_atomicrmw_add_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
99 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
100 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
101 ; COMMON-NEXT: ret i32 [[RES]]
103 %res = atomicrmw add ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
107 ;---------------------------------------------------------------------
109 ;---------------------------------------------------------------------
111 ; expansion is necessary, sub is not supported over PCIe
112 define i32 @test_atomicrmw_sub_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
113 ; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system(
114 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
115 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
116 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
118 %res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst
122 define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
123 ; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory(
124 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
125 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
126 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
128 %res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
132 define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
133 ; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_remote_memory(
134 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
135 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
136 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
138 %res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
142 define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
143 ; COMMON-LABEL: define i32 @test_atomicrmw_sub_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
144 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
145 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
146 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
148 %res = atomicrmw sub ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
152 ;---------------------------------------------------------------------
154 ;---------------------------------------------------------------------
156 ; expansion is necessary, operation not supported over PCIe
157 define i32 @test_atomicrmw_and_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
158 ; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system(
159 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
160 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
161 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
163 %res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst
167 define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
168 ; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory(
169 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
170 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
171 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
173 %res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
177 define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
178 ; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_remote_memory(
179 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
180 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
181 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
183 %res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
187 define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
188 ; COMMON-LABEL: define i32 @test_atomicrmw_and_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
189 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
190 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw and ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
191 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
193 %res = atomicrmw and ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
197 ;---------------------------------------------------------------------
199 ;---------------------------------------------------------------------
201 ; expansion is necessary, operation not supported
202 define i32 @test_atomicrmw_nand_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
203 ; COMMON-LABEL: define i32 @test_atomicrmw_nand_i32_global_system(
204 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
205 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
206 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
207 ; COMMON: atomicrmw.start:
208 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
209 ; COMMON-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE]]
210 ; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
211 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
212 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
213 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
214 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
215 ; COMMON: atomicrmw.end:
216 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
218 %res = atomicrmw nand ptr addrspace(1) %ptr, i32 %value seq_cst
222 define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
223 ; COMMON-LABEL: define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_fine_grained_memory(
224 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
225 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
226 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
227 ; COMMON: atomicrmw.start:
228 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
229 ; COMMON-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE]]
230 ; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
231 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
232 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
233 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
234 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
235 ; COMMON: atomicrmw.end:
236 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
238 %res = atomicrmw nand ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
242 define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
243 ; COMMON-LABEL: define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_remote_memory(
244 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
245 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
246 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
247 ; COMMON: atomicrmw.start:
248 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
249 ; COMMON-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE]]
250 ; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
251 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
252 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
253 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
254 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
255 ; COMMON: atomicrmw.end:
256 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
258 %res = atomicrmw nand ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
262 define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
263 ; COMMON-LABEL: define i32 @test_atomicrmw_nand_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
264 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
265 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
266 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
267 ; COMMON: atomicrmw.start:
268 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
269 ; COMMON-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[VALUE]]
270 ; COMMON-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
271 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
272 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
273 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
274 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
275 ; COMMON: atomicrmw.end:
276 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
278 %res = atomicrmw nand ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
282 ;---------------------------------------------------------------------
284 ;---------------------------------------------------------------------
286 ; expansion is necessary, operation not supported over PCIe
287 define i32 @test_atomicrmw_or_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
288 ; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system(
289 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
290 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
291 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
293 %res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst
297 define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
298 ; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory(
299 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
300 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
301 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
303 %res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
307 define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
308 ; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_remote_memory(
309 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
310 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
311 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
313 %res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
317 define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
318 ; COMMON-LABEL: define i32 @test_atomicrmw_or_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
319 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
320 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
321 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
323 %res = atomicrmw or ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
327 ;---------------------------------------------------------------------
329 ;---------------------------------------------------------------------
331 ; expansion is necessary, operation not supported over PCIe
332 define i32 @test_atomicrmw_xor_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
333 ; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system(
334 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
335 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
336 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
338 %res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst
342 define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
343 ; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory(
344 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
345 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
346 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
348 %res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
352 define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
353 ; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_remote_memory(
354 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
355 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
356 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
358 %res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
362 define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
363 ; COMMON-LABEL: define i32 @test_atomicrmw_xor_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
364 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
365 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
366 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
368 %res = atomicrmw xor ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
372 ;---------------------------------------------------------------------
374 ;---------------------------------------------------------------------
376 ; expansion is necessary, operation not supported over PCIe
377 define i32 @test_atomicrmw_max_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
378 ; COMMON-LABEL: define i32 @test_atomicrmw_max_i32_global_system(
379 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
380 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
381 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
382 ; COMMON: atomicrmw.start:
383 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
384 ; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[LOADED]], [[VALUE]]
385 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
386 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
387 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
388 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
389 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
390 ; COMMON: atomicrmw.end:
391 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
393 %res = atomicrmw max ptr addrspace(1) %ptr, i32 %value seq_cst
397 define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
398 ; COMMON-LABEL: define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_fine_grained_memory(
399 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
400 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
401 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
402 ; COMMON: atomicrmw.start:
403 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
404 ; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[LOADED]], [[VALUE]]
405 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
406 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
407 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
408 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
409 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
410 ; COMMON: atomicrmw.end:
411 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
413 %res = atomicrmw max ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
417 define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
418 ; COMMON-LABEL: define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_remote_memory(
419 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
420 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
421 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
422 ; COMMON: atomicrmw.start:
423 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
424 ; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[LOADED]], [[VALUE]]
425 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
426 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
427 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
428 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
429 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
430 ; COMMON: atomicrmw.end:
431 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
433 %res = atomicrmw max ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
437 define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
438 ; COMMON-LABEL: define i32 @test_atomicrmw_max_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
439 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
440 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
441 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
442 ; COMMON: atomicrmw.start:
443 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
444 ; COMMON-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[LOADED]], [[VALUE]]
445 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
446 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
447 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
448 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
449 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
450 ; COMMON: atomicrmw.end:
451 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
453 %res = atomicrmw max ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
457 ;---------------------------------------------------------------------
459 ;---------------------------------------------------------------------
461 ; expansion is necessary, operation not supported over PCIe
462 define i32 @test_atomicrmw_min_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
463 ; COMMON-LABEL: define i32 @test_atomicrmw_min_i32_global_system(
464 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
465 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
466 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
467 ; COMMON: atomicrmw.start:
468 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
469 ; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i32 [[LOADED]], [[VALUE]]
470 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
471 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
472 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
473 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
474 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
475 ; COMMON: atomicrmw.end:
476 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
478 %res = atomicrmw min ptr addrspace(1) %ptr, i32 %value seq_cst
482 define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
483 ; COMMON-LABEL: define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_fine_grained_memory(
484 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
485 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
486 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
487 ; COMMON: atomicrmw.start:
488 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
489 ; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i32 [[LOADED]], [[VALUE]]
490 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
491 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
492 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
493 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
494 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
495 ; COMMON: atomicrmw.end:
496 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
498 %res = atomicrmw min ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
502 define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
503 ; COMMON-LABEL: define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_remote_memory(
504 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
505 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
506 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
507 ; COMMON: atomicrmw.start:
508 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
509 ; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i32 [[LOADED]], [[VALUE]]
510 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
511 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
512 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
513 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
514 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
515 ; COMMON: atomicrmw.end:
516 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
518 %res = atomicrmw min ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
522 define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
523 ; COMMON-LABEL: define i32 @test_atomicrmw_min_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
524 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
525 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
526 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
527 ; COMMON: atomicrmw.start:
528 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
529 ; COMMON-NEXT: [[TMP2:%.*]] = icmp sle i32 [[LOADED]], [[VALUE]]
530 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
531 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
532 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
533 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
534 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
535 ; COMMON: atomicrmw.end:
536 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
538 %res = atomicrmw min ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
542 ;---------------------------------------------------------------------
544 ;---------------------------------------------------------------------
546 ; expansion is necessary, operation not supported over PCIe
547 define i32 @test_atomicrmw_umax_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
548 ; COMMON-LABEL: define i32 @test_atomicrmw_umax_i32_global_system(
549 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
550 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
551 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
552 ; COMMON: atomicrmw.start:
553 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
554 ; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
555 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
556 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
557 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
558 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
559 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
560 ; COMMON: atomicrmw.end:
561 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
563 %res = atomicrmw umax ptr addrspace(1) %ptr, i32 %value seq_cst
567 define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
568 ; COMMON-LABEL: define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_fine_grained_memory(
569 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
570 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
571 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
572 ; COMMON: atomicrmw.start:
573 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
574 ; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
575 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
576 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
577 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
578 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
579 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
580 ; COMMON: atomicrmw.end:
581 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
583 %res = atomicrmw umax ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
587 define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
588 ; COMMON-LABEL: define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_remote_memory(
589 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
590 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
591 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
592 ; COMMON: atomicrmw.start:
593 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
594 ; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
595 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
596 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
597 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
598 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
599 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
600 ; COMMON: atomicrmw.end:
601 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
603 %res = atomicrmw umax ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
607 define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
608 ; COMMON-LABEL: define i32 @test_atomicrmw_umax_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
609 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
610 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
611 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
612 ; COMMON: atomicrmw.start:
613 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
614 ; COMMON-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[LOADED]], [[VALUE]]
615 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
616 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
617 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
618 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
619 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
620 ; COMMON: atomicrmw.end:
621 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
623 %res = atomicrmw umax ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
627 ;---------------------------------------------------------------------
629 ;---------------------------------------------------------------------
631 ; expansion is necessary, operation not supported over PCIe
632 define i32 @test_atomicrmw_umin_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
633 ; COMMON-LABEL: define i32 @test_atomicrmw_umin_i32_global_system(
634 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
635 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
636 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
637 ; COMMON: atomicrmw.start:
638 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
639 ; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i32 [[LOADED]], [[VALUE]]
640 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
641 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
642 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
643 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
644 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
645 ; COMMON: atomicrmw.end:
646 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
648 %res = atomicrmw umin ptr addrspace(1) %ptr, i32 %value seq_cst
652 define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
653 ; COMMON-LABEL: define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_fine_grained_memory(
654 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
655 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
656 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
657 ; COMMON: atomicrmw.start:
658 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
659 ; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i32 [[LOADED]], [[VALUE]]
660 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
661 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
662 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
663 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
664 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
665 ; COMMON: atomicrmw.end:
666 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
668 %res = atomicrmw umin ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
672 define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
673 ; COMMON-LABEL: define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_remote_memory(
674 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
675 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
676 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
677 ; COMMON: atomicrmw.start:
678 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
679 ; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i32 [[LOADED]], [[VALUE]]
680 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
681 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
682 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
683 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
684 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
685 ; COMMON: atomicrmw.end:
686 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
688 %res = atomicrmw umin ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
692 define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
693 ; COMMON-LABEL: define i32 @test_atomicrmw_umin_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
694 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
695 ; COMMON-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
696 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
697 ; COMMON: atomicrmw.start:
698 ; COMMON-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
699 ; COMMON-NEXT: [[TMP2:%.*]] = icmp ule i32 [[LOADED]], [[VALUE]]
700 ; COMMON-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[LOADED]], i32 [[VALUE]]
701 ; COMMON-NEXT: [[TMP3:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
702 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
703 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0
704 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
705 ; COMMON: atomicrmw.end:
706 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
708 %res = atomicrmw umin ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
712 ;---------------------------------------------------------------------
713 ; atomicrmw uinc_wrap
714 ;---------------------------------------------------------------------
716 ; expansion is necessary, operation not supported over PCIe
717 define i32 @test_atomicrmw_uinc_wrap_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
718 ; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system(
719 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
720 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
721 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
723 %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst
727 define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
728 ; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory(
729 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
730 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
731 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
733 %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
737 define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
738 ; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_remote_memory(
739 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
740 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
741 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
743 %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
747 define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
748 ; COMMON-LABEL: define i32 @test_atomicrmw_uinc_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
749 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
750 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw uinc_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
751 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
753 %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
757 ;---------------------------------------------------------------------
758 ; atomicrmw udec_wrap
759 ;---------------------------------------------------------------------
761 ; expansion is necessary, operation not supported over PCIe
762 define i32 @test_atomicrmw_udec_wrap_i32_global_system(ptr addrspace(1) %ptr, i32 %value) {
763 ; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system(
764 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
765 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4
766 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
768 %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst
772 define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %value) {
773 ; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory(
774 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
775 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
776 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
778 %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0
782 define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
783 ; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_remote_memory(
784 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
785 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
786 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
788 %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.remote.memory !0
792 define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i32 %value) {
793 ; COMMON-LABEL: define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
794 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
795 ; COMMON-NEXT: [[NEWLOADED:%.*]] = atomicrmw udec_wrap ptr addrspace(1) [[PTR]], i32 [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
796 ; COMMON-NEXT: ret i32 [[NEWLOADED]]
798 %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
804 ; GFX803: [[META0]] = !{}
806 ; GFX906: [[META0]] = !{}
808 ; GFX908: [[META0]] = !{}
810 ; GFX90A: [[META0]] = !{}
812 ; GFX940: [[META0]] = !{}
814 ; GFX10: [[META0]] = !{}
816 ; GFX11: [[META0]] = !{}
818 ; GFX12: [[META0]] = !{}
820 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: