1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefix=GFX908 %s
3 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefix=GFX90A %s
4 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefix=GFX940 %s
5 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefix=GFX1100 %s
7 define float @syncscope_system(ptr %addr, float %val) {
8 ; GFX908-LABEL: @syncscope_system(
9 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
10 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
11 ; GFX908: atomicrmw.start:
12 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
13 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
14 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
15 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
16 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
17 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
18 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
19 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
20 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
21 ; GFX908: atomicrmw.end:
22 ; GFX908-NEXT: ret float [[TMP5]]
24 ; GFX90A-LABEL: @syncscope_system(
25 ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
26 ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
27 ; GFX90A: atomicrmw.shared:
28 ; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
29 ; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL:%.*]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.ignore.denormal.mode [[META0]]
30 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
31 ; GFX90A: atomicrmw.check.private:
32 ; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
33 ; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]]
34 ; GFX90A: atomicrmw.private:
35 ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
36 ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
37 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
38 ; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
39 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
40 ; GFX90A: atomicrmw.global:
41 ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
42 ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
43 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
44 ; GFX90A: atomicrmw.phi:
45 ; GFX90A-NEXT: [[RES:%.*]] = phi float [ [[TMP2]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP5]], [[ATOMICRMW_GLOBAL]] ]
46 ; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
47 ; GFX90A: atomicrmw.end:
48 ; GFX90A-NEXT: ret float [[RES]]
50 ; GFX940-LABEL: @syncscope_system(
51 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.ignore.denormal.mode [[META0]]
52 ; GFX940-NEXT: ret float [[RES]]
54 ; GFX1100-LABEL: @syncscope_system(
55 ; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.ignore.denormal.mode [[META0]]
56 ; GFX1100-NEXT: ret float [[RES]]
58 ; GFX11-LABEL: @syncscope_system(
59 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
60 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
61 ; GFX11: atomicrmw.start:
62 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
63 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
64 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
65 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
66 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
67 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
68 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
69 ; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
70 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
71 ; GFX11: atomicrmw.end:
72 ; GFX11-NEXT: ret float [[TMP6]]
73 %res = atomicrmw fadd ptr %addr, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
77 define float @syncscope_workgroup_rtn(ptr %addr, float %val) {
78 ; GFX908-LABEL: @syncscope_workgroup_rtn(
79 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
80 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
81 ; GFX908: atomicrmw.start:
82 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
83 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
84 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
85 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
86 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("workgroup") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
87 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
88 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
89 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
90 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
91 ; GFX908: atomicrmw.end:
92 ; GFX908-NEXT: ret float [[TMP5]]
94 ; GFX90A-LABEL: @syncscope_workgroup_rtn(
95 ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
96 ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
97 ; GFX90A: atomicrmw.shared:
98 ; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
99 ; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
100 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
101 ; GFX90A: atomicrmw.check.private:
102 ; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
103 ; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]]
104 ; GFX90A: atomicrmw.private:
105 ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
106 ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
107 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
108 ; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
109 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
110 ; GFX90A: atomicrmw.global:
111 ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
112 ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
113 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
114 ; GFX90A: atomicrmw.phi:
115 ; GFX90A-NEXT: [[RES:%.*]] = phi float [ [[TMP2]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP5]], [[ATOMICRMW_GLOBAL]] ]
116 ; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
117 ; GFX90A: atomicrmw.end:
118 ; GFX90A-NEXT: ret float [[RES]]
120 ; GFX940-LABEL: @syncscope_workgroup_rtn(
121 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
122 ; GFX940-NEXT: ret float [[RES]]
124 ; GFX1100-LABEL: @syncscope_workgroup_rtn(
125 ; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
126 ; GFX1100-NEXT: ret float [[RES]]
128 ; GFX11-LABEL: @syncscope_workgroup_rtn(
129 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
130 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
131 ; GFX11: atomicrmw.start:
132 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
133 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
134 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
135 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
136 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4
137 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
138 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
139 ; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
140 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
141 ; GFX11: atomicrmw.end:
142 ; GFX11-NEXT: ret float [[TMP6]]
143 %res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
147 define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
148 ; GFX908-LABEL: @syncscope_workgroup_nortn(
149 ; GFX908-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
150 ; GFX908-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
151 ; GFX908: atomicrmw.shared:
152 ; GFX908-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
153 ; GFX908-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
154 ; GFX908-NEXT: br label [[ATOMICRMW_PHI:%.*]]
155 ; GFX908: atomicrmw.check.private:
156 ; GFX908-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
157 ; GFX908-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]]
158 ; GFX908: atomicrmw.private:
159 ; GFX908-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
160 ; GFX908-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
161 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
162 ; GFX908-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
163 ; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
164 ; GFX908: atomicrmw.global:
165 ; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
166 ; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
167 ; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
168 ; GFX908: atomicrmw.phi:
169 ; GFX908-NEXT: br label [[ATOMICRMW_END:%.*]]
170 ; GFX908: atomicrmw.end:
171 ; GFX908-NEXT: ret void
173 ; GFX90A-LABEL: @syncscope_workgroup_nortn(
174 ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
175 ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
176 ; GFX90A: atomicrmw.shared:
177 ; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
178 ; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
179 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
180 ; GFX90A: atomicrmw.check.private:
181 ; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
182 ; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]]
183 ; GFX90A: atomicrmw.private:
184 ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
185 ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
186 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
187 ; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
188 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
189 ; GFX90A: atomicrmw.global:
190 ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
191 ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
192 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
193 ; GFX90A: atomicrmw.phi:
194 ; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
195 ; GFX90A: atomicrmw.end:
196 ; GFX90A-NEXT: ret void
198 ; GFX940-LABEL: @syncscope_workgroup_nortn(
199 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
200 ; GFX940-NEXT: ret void
202 ; GFX1100-LABEL: @syncscope_workgroup_nortn(
203 ; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
204 ; GFX1100-NEXT: ret void
206 ; GFX11-LABEL: @syncscope_workgroup_nortn(
207 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
208 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
209 ; GFX11: atomicrmw.start:
210 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
211 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
212 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
213 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
214 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4
215 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
216 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
217 ; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
218 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
219 ; GFX11: atomicrmw.end:
220 ; GFX11-NEXT: ret void
221 %res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
225 define float @no_unsafe(ptr %addr, float %val) {
226 ; GFX908-LABEL: @no_unsafe(
227 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
228 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
229 ; GFX908: atomicrmw.start:
230 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
231 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
232 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
233 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
234 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("workgroup") seq_cst seq_cst, align 4
235 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
236 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
237 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
238 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
239 ; GFX908: atomicrmw.end:
240 ; GFX908-NEXT: ret float [[TMP5]]
242 ; GFX90A-LABEL: @no_unsafe(
243 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
244 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
245 ; GFX90A: atomicrmw.start:
246 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
247 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
248 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
249 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
250 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("workgroup") seq_cst seq_cst, align 4
251 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
252 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
253 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
254 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
255 ; GFX90A: atomicrmw.end:
256 ; GFX90A-NEXT: ret float [[TMP5]]
258 ; GFX940-LABEL: @no_unsafe(
259 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4
260 ; GFX940-NEXT: ret float [[RES]]
262 ; GFX1100-LABEL: @no_unsafe(
263 ; GFX1100-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
264 ; GFX1100-NEXT: br label [[ATOMICRMW_START:%.*]]
265 ; GFX1100: atomicrmw.start:
266 ; GFX1100-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
267 ; GFX1100-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
268 ; GFX1100-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
269 ; GFX1100-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
270 ; GFX1100-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("workgroup") seq_cst seq_cst, align 4
271 ; GFX1100-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
272 ; GFX1100-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
273 ; GFX1100-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
274 ; GFX1100-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
275 ; GFX1100: atomicrmw.end:
276 ; GFX1100-NEXT: ret float [[TMP5]]
278 ; GFX11-LABEL: @no_unsafe(
279 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4
280 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
281 ; GFX11: atomicrmw.start:
282 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
283 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
284 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32
285 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
286 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4
287 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
288 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
289 ; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
290 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
291 ; GFX11: atomicrmw.end:
292 ; GFX11-NEXT: ret float [[TMP6]]
293 %res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst
297 define float @flat_atomicrmw_fadd_f32__align32(ptr %addr, float %val) {
298 ; GFX908-LABEL: @flat_atomicrmw_fadd_f32__align32(
299 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 32
300 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
301 ; GFX908: atomicrmw.start:
302 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
303 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
304 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
305 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
306 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]]
307 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
308 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
309 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
310 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
311 ; GFX908: atomicrmw.end:
312 ; GFX908-NEXT: ret float [[TMP5]]
314 ; GFX90A-LABEL: @flat_atomicrmw_fadd_f32__align32(
315 ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
316 ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
317 ; GFX90A: atomicrmw.shared:
318 ; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
319 ; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL:%.*]] seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
320 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
321 ; GFX90A: atomicrmw.check.private:
322 ; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
323 ; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]]
324 ; GFX90A: atomicrmw.private:
325 ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
326 ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 32
327 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
328 ; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 32
329 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
330 ; GFX90A: atomicrmw.global:
331 ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
332 ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
333 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
334 ; GFX90A: atomicrmw.phi:
335 ; GFX90A-NEXT: [[RES:%.*]] = phi float [ [[TMP2]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP5]], [[ATOMICRMW_GLOBAL]] ]
336 ; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
337 ; GFX90A: atomicrmw.end:
338 ; GFX90A-NEXT: ret float [[RES]]
340 ; GFX940-LABEL: @flat_atomicrmw_fadd_f32__align32(
341 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
342 ; GFX940-NEXT: ret float [[RES]]
344 ; GFX1100-LABEL: @flat_atomicrmw_fadd_f32__align32(
345 ; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
346 ; GFX1100-NEXT: ret float [[RES]]
348 %res = atomicrmw fadd ptr %addr, float %val seq_cst, align 32, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0