1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX803 %s
3 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s
4 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s
5 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s
6 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s
7 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s
8 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s
9 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s
11 ;---------------------------------------------------------------------
13 ;---------------------------------------------------------------------
15 ; xchg is supported over PCIe, so no expansion is necessary
16 define float @test_atomicrmw_xchg_f32_global_agent(ptr addrspace(1) %ptr, float %value) {
17 ; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent(
18 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
19 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4
20 ; COMMON-NEXT: ret float [[RES]]
22 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst
26 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
27 define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
28 ; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory(
29 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
30 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
31 ; COMMON-NEXT: ret float [[RES]]
33 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
37 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
38 define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
39 ; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_remote_memory(
40 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
41 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
42 ; COMMON-NEXT: ret float [[RES]]
44 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
48 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
49 define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
50 ; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
51 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
52 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
53 ; COMMON-NEXT: ret float [[RES]]
55 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
59 ;---------------------------------------------------------------------
61 ;---------------------------------------------------------------------
63 define float @test_atomicrmw_fadd_f32_global_agent(ptr addrspace(1) %ptr, float %value) {
64 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent(
65 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
66 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
67 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
68 ; GFX803: atomicrmw.start:
69 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
70 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
71 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
72 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
73 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
74 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
75 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
76 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
77 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
78 ; GFX803: atomicrmw.end:
79 ; GFX803-NEXT: ret float [[TMP5]]
81 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent(
82 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
83 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
84 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
85 ; GFX906: atomicrmw.start:
86 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
87 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
88 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
89 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
90 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
91 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
92 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
93 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
94 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
95 ; GFX906: atomicrmw.end:
96 ; GFX906-NEXT: ret float [[TMP5]]
98 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent(
99 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
100 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
101 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
102 ; GFX908: atomicrmw.start:
103 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
104 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
105 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
106 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
107 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
108 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
109 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
110 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
111 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
112 ; GFX908: atomicrmw.end:
113 ; GFX908-NEXT: ret float [[TMP5]]
115 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent(
116 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
117 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
118 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
119 ; GFX90A: atomicrmw.start:
120 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
121 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
122 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
123 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
124 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
125 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
126 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
127 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
128 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
129 ; GFX90A: atomicrmw.end:
130 ; GFX90A-NEXT: ret float [[TMP5]]
132 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent(
133 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
134 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4
135 ; GFX940-NEXT: ret float [[RES]]
137 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent(
138 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
139 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
140 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
141 ; GFX10: atomicrmw.start:
142 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
143 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
144 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
145 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
146 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
147 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
148 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
149 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
150 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
151 ; GFX10: atomicrmw.end:
152 ; GFX10-NEXT: ret float [[TMP5]]
154 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent(
155 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
156 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
157 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
158 ; GFX11: atomicrmw.start:
159 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
160 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
161 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
162 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
163 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
164 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
165 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
166 ; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
167 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
168 ; GFX11: atomicrmw.end:
169 ; GFX11-NEXT: ret float [[TMP5]]
171 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent(
172 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
173 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4
174 ; GFX12-NEXT: ret float [[TMP5]]
176 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst
180 define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
181 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(
182 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
183 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
184 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
185 ; GFX803: atomicrmw.start:
186 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
187 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
188 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
189 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
190 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
191 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
192 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
193 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
194 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
195 ; GFX803: atomicrmw.end:
196 ; GFX803-NEXT: ret float [[TMP5]]
198 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(
199 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
200 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
201 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
202 ; GFX906: atomicrmw.start:
203 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
204 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
205 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
206 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
207 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
208 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
209 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
210 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
211 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
212 ; GFX906: atomicrmw.end:
213 ; GFX906-NEXT: ret float [[TMP5]]
215 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(
216 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
217 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
218 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
219 ; GFX908: atomicrmw.start:
220 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
221 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
222 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
223 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
224 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
225 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
226 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
227 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
228 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
229 ; GFX908: atomicrmw.end:
230 ; GFX908-NEXT: ret float [[TMP5]]
232 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(
233 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
234 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
235 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
236 ; GFX90A: atomicrmw.start:
237 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
238 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
239 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
240 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
241 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
242 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
243 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
244 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
245 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
246 ; GFX90A: atomicrmw.end:
247 ; GFX90A-NEXT: ret float [[TMP5]]
249 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(
250 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
251 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
252 ; GFX940-NEXT: ret float [[RES]]
254 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(
255 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
256 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
257 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
258 ; GFX10: atomicrmw.start:
259 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
260 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
261 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
262 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
263 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
264 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
265 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
266 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
267 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
268 ; GFX10: atomicrmw.end:
269 ; GFX10-NEXT: ret float [[TMP5]]
271 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(
272 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
273 ; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
274 ; GFX11-NEXT: ret float [[TMP5]]
276 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(
277 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
278 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
279 ; GFX12-NEXT: ret float [[TMP5]]
281 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
285 define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
286 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory(
287 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
288 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
289 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
290 ; GFX803: atomicrmw.start:
291 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
292 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
293 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
294 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
295 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
296 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
297 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
298 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
299 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
300 ; GFX803: atomicrmw.end:
301 ; GFX803-NEXT: ret float [[TMP5]]
303 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory(
304 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
305 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
306 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
307 ; GFX906: atomicrmw.start:
308 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
309 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
310 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
311 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
312 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
313 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
314 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
315 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
316 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
317 ; GFX906: atomicrmw.end:
318 ; GFX906-NEXT: ret float [[TMP5]]
320 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory(
321 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
322 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
323 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
324 ; GFX908: atomicrmw.start:
325 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
326 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
327 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
328 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
329 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
330 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
331 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
332 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
333 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
334 ; GFX908: atomicrmw.end:
335 ; GFX908-NEXT: ret float [[TMP5]]
337 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory(
338 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
339 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
340 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
341 ; GFX90A: atomicrmw.start:
342 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
343 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
344 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
345 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
346 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
347 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
348 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
349 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
350 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
351 ; GFX90A: atomicrmw.end:
352 ; GFX90A-NEXT: ret float [[TMP5]]
354 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory(
355 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
356 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
357 ; GFX940-NEXT: ret float [[RES]]
359 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory(
360 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
361 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
362 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
363 ; GFX10: atomicrmw.start:
364 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
365 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
366 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
367 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
368 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
369 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
370 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
371 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
372 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
373 ; GFX10: atomicrmw.end:
374 ; GFX10-NEXT: ret float [[TMP5]]
376 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory(
377 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
378 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
379 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
380 ; GFX11: atomicrmw.start:
381 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
382 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
383 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
384 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
385 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
386 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
387 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
388 ; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
389 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
390 ; GFX11: atomicrmw.end:
391 ; GFX11-NEXT: ret float [[TMP5]]
393 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory(
394 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
395 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
396 ; GFX12-NEXT: ret float [[TMP5]]
398 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
402 define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
403 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
404 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
405 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
406 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
407 ; GFX803: atomicrmw.start:
408 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
409 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
410 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
411 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
412 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
413 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
414 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
415 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
416 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
417 ; GFX803: atomicrmw.end:
418 ; GFX803-NEXT: ret float [[TMP5]]
420 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
421 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
422 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
423 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
424 ; GFX906: atomicrmw.start:
425 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
426 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
427 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
428 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
429 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
430 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
431 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
432 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
433 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
434 ; GFX906: atomicrmw.end:
435 ; GFX906-NEXT: ret float [[TMP5]]
437 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
438 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
439 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
440 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
441 ; GFX908: atomicrmw.start:
442 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
443 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
444 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
445 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
446 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
447 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
448 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
449 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
450 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
451 ; GFX908: atomicrmw.end:
452 ; GFX908-NEXT: ret float [[TMP5]]
454 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
455 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
456 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
457 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
458 ; GFX90A: atomicrmw.start:
459 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
460 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
461 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
462 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
463 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
464 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
465 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
466 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
467 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
468 ; GFX90A: atomicrmw.end:
469 ; GFX90A-NEXT: ret float [[TMP5]]
471 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
472 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
473 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
474 ; GFX940-NEXT: ret float [[RES]]
476 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
477 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
478 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
479 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
480 ; GFX10: atomicrmw.start:
481 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
482 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
483 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
484 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
485 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
486 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
487 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
488 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
489 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
490 ; GFX10: atomicrmw.end:
491 ; GFX10-NEXT: ret float [[TMP5]]
493 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
494 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
495 ; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
496 ; GFX11-NEXT: ret float [[TMP5]]
498 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
499 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
500 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
501 ; GFX12-NEXT: ret float [[TMP5]]
503 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
507 define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(ptr addrspace(1) %ptr, float %value) #0 {
508 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
509 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
510 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
511 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
512 ; GFX803: atomicrmw.start:
513 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
514 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
515 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
516 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
517 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
518 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
519 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
520 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
521 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
522 ; GFX803: atomicrmw.end:
523 ; GFX803-NEXT: ret float [[TMP5]]
525 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
526 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
527 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
528 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
529 ; GFX906: atomicrmw.start:
530 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
531 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
532 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
533 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
534 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
535 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
536 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
537 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
538 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
539 ; GFX906: atomicrmw.end:
540 ; GFX906-NEXT: ret float [[TMP5]]
542 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
543 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
544 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
545 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
546 ; GFX908: atomicrmw.start:
547 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
548 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
549 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
550 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
551 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
552 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
553 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
554 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
555 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
556 ; GFX908: atomicrmw.end:
557 ; GFX908-NEXT: ret float [[TMP5]]
559 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
560 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
561 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
562 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
563 ; GFX90A: atomicrmw.start:
564 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
565 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
566 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
567 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
568 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
569 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
570 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
571 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
572 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
573 ; GFX90A: atomicrmw.end:
574 ; GFX90A-NEXT: ret float [[TMP5]]
576 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
577 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
578 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
579 ; GFX940-NEXT: ret float [[RES]]
581 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
582 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
583 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
584 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
585 ; GFX10: atomicrmw.start:
586 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
587 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
588 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
589 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
590 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
591 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
592 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
593 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
594 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
595 ; GFX10: atomicrmw.end:
596 ; GFX10-NEXT: ret float [[TMP5]]
598 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
599 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
600 ; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
601 ; GFX11-NEXT: ret float [[TMP5]]
603 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
604 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
605 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
606 ; GFX12-NEXT: ret float [[TMP5]]
608 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
612 define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(ptr addrspace(1) %ptr, float %value) #1 {
613 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
614 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
615 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
616 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
617 ; GFX803: atomicrmw.start:
618 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
619 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
620 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
621 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
622 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
623 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
624 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
625 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
626 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
627 ; GFX803: atomicrmw.end:
628 ; GFX803-NEXT: ret float [[TMP5]]
630 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
631 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
632 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
633 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
634 ; GFX906: atomicrmw.start:
635 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
636 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
637 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
638 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
639 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
640 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
641 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
642 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
643 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
644 ; GFX906: atomicrmw.end:
645 ; GFX906-NEXT: ret float [[TMP5]]
647 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
648 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
649 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
650 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
651 ; GFX908: atomicrmw.start:
652 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
653 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
654 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
655 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
656 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
657 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
658 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
659 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
660 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
661 ; GFX908: atomicrmw.end:
662 ; GFX908-NEXT: ret float [[TMP5]]
664 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
665 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
666 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
667 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
668 ; GFX90A: atomicrmw.start:
669 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
670 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
671 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
672 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
673 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
674 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
675 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
676 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
677 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
678 ; GFX90A: atomicrmw.end:
679 ; GFX90A-NEXT: ret float [[TMP5]]
681 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
682 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
683 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
684 ; GFX940-NEXT: ret float [[RES]]
686 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
687 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
688 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
689 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
690 ; GFX10: atomicrmw.start:
691 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
692 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
693 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
694 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
695 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
696 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
697 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
698 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
699 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
700 ; GFX10: atomicrmw.end:
701 ; GFX10-NEXT: ret float [[TMP5]]
703 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
704 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
705 ; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
706 ; GFX11-NEXT: ret float [[TMP5]]
708 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
709 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
710 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
711 ; GFX12-NEXT: ret float [[TMP5]]
713 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
717 define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) {
718 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode(
719 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
720 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
721 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
722 ; GFX803: atomicrmw.start:
723 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
724 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
725 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
726 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
727 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
728 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
729 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
730 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
731 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
732 ; GFX803: atomicrmw.end:
733 ; GFX803-NEXT: ret float [[TMP5]]
735 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode(
736 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
737 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
738 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
739 ; GFX906: atomicrmw.start:
740 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
741 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
742 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
743 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
744 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
745 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
746 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
747 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
748 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
749 ; GFX906: atomicrmw.end:
750 ; GFX906-NEXT: ret float [[TMP5]]
752 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode(
753 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
754 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
755 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
756 ; GFX908: atomicrmw.start:
757 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
758 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
759 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
760 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
761 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
762 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
763 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
764 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
765 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
766 ; GFX908: atomicrmw.end:
767 ; GFX908-NEXT: ret float [[TMP5]]
769 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode(
770 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
771 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
772 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
773 ; GFX90A: atomicrmw.start:
774 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
775 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
776 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
777 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
778 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
779 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
780 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
781 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
782 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
783 ; GFX90A: atomicrmw.end:
784 ; GFX90A-NEXT: ret float [[TMP5]]
786 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode(
787 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
788 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]]
789 ; GFX940-NEXT: ret float [[RES]]
791 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode(
792 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
793 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
794 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
795 ; GFX10: atomicrmw.start:
796 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
797 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
798 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
799 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
800 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
801 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
802 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
803 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
804 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
805 ; GFX10: atomicrmw.end:
806 ; GFX10-NEXT: ret float [[TMP5]]
808 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode(
809 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
810 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
811 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
812 ; GFX11: atomicrmw.start:
813 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
814 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
815 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
816 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
817 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
818 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
819 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
820 ; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
821 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
822 ; GFX11: atomicrmw.end:
823 ; GFX11-NEXT: ret float [[TMP5]]
825 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode(
826 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
827 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]]
828 ; GFX12-NEXT: ret float [[TMP5]]
830 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
834 define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
835 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
836 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
837 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
838 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
839 ; GFX803: atomicrmw.start:
840 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
841 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
842 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
843 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
844 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
845 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
846 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
847 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
848 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
849 ; GFX803: atomicrmw.end:
850 ; GFX803-NEXT: ret float [[TMP5]]
852 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
853 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
854 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
855 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
856 ; GFX906: atomicrmw.start:
857 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
858 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
859 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
860 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
861 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
862 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
863 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
864 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
865 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
866 ; GFX906: atomicrmw.end:
867 ; GFX906-NEXT: ret float [[TMP5]]
869 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
870 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
871 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
872 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
873 ; GFX908: atomicrmw.start:
874 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
875 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
876 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
877 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
878 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
879 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
880 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
881 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
882 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
883 ; GFX908: atomicrmw.end:
884 ; GFX908-NEXT: ret float [[TMP5]]
886 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
887 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
888 ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
889 ; GFX90A-NEXT: ret float [[TMP5]]
891 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
892 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
893 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
894 ; GFX940-NEXT: ret float [[RES]]
896 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
897 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
898 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
899 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
900 ; GFX10: atomicrmw.start:
901 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
902 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
903 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
904 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
905 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
906 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
907 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
908 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
909 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
910 ; GFX10: atomicrmw.end:
911 ; GFX10-NEXT: ret float [[TMP5]]
913 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
914 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
915 ; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
916 ; GFX11-NEXT: ret float [[TMP5]]
918 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
919 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
920 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
921 ; GFX12-NEXT: ret float [[TMP5]]
923 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
927 define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
928 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
929 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
930 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
931 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
932 ; GFX803: atomicrmw.start:
933 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
934 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
935 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
936 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
937 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
938 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
939 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
940 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
941 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
942 ; GFX803: atomicrmw.end:
943 ; GFX803-NEXT: ret float [[TMP5]]
945 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
946 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
947 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
948 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
949 ; GFX906: atomicrmw.start:
950 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
951 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
952 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
953 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
954 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
955 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
956 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
957 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
958 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
959 ; GFX906: atomicrmw.end:
960 ; GFX906-NEXT: ret float [[TMP5]]
962 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
963 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
964 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
965 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
966 ; GFX908: atomicrmw.start:
967 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
968 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
969 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
970 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
971 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
972 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
973 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
974 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
975 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
976 ; GFX908: atomicrmw.end:
977 ; GFX908-NEXT: ret float [[TMP5]]
979 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
980 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
981 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
982 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
983 ; GFX90A: atomicrmw.start:
984 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
985 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
986 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
987 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
988 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
989 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
990 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
991 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
992 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
993 ; GFX90A: atomicrmw.end:
994 ; GFX90A-NEXT: ret float [[TMP5]]
996 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
997 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
998 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
999 ; GFX940-NEXT: ret float [[RES]]
1001 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
1002 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1003 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1004 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1005 ; GFX10: atomicrmw.start:
1006 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1007 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1008 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1009 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1010 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
1011 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1012 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1013 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1014 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1015 ; GFX10: atomicrmw.end:
1016 ; GFX10-NEXT: ret float [[TMP5]]
1018 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
1019 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1020 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1021 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
1022 ; GFX11: atomicrmw.start:
1023 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1024 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1025 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1026 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1027 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
1028 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1029 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1030 ; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1031 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1032 ; GFX11: atomicrmw.end:
1033 ; GFX11-NEXT: ret float [[TMP5]]
1035 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
1036 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1037 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1038 ; GFX12-NEXT: ret float [[TMP5]]
1040 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1044 define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
1045 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1046 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1047 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1048 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1049 ; GFX803: atomicrmw.start:
1050 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1051 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1052 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1053 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1054 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1055 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1056 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1057 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1058 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1059 ; GFX803: atomicrmw.end:
1060 ; GFX803-NEXT: ret float [[TMP5]]
1062 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1063 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1064 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1065 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1066 ; GFX906: atomicrmw.start:
1067 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1068 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1069 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1070 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1071 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1072 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1073 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1074 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1075 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1076 ; GFX906: atomicrmw.end:
1077 ; GFX906-NEXT: ret float [[TMP5]]
1079 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1080 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1081 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1082 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1083 ; GFX908: atomicrmw.start:
1084 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1085 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1086 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1087 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1088 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1089 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1090 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1091 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1092 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1093 ; GFX908: atomicrmw.end:
1094 ; GFX908-NEXT: ret float [[TMP5]]
1096 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1097 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1098 ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1099 ; GFX90A-NEXT: ret float [[TMP5]]
1101 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1102 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1103 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1104 ; GFX940-NEXT: ret float [[RES]]
1106 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1107 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1108 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1109 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1110 ; GFX10: atomicrmw.start:
1111 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1112 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1113 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1114 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1115 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1116 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1117 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1118 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1119 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1120 ; GFX10: atomicrmw.end:
1121 ; GFX10-NEXT: ret float [[TMP5]]
1123 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1124 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1125 ; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1126 ; GFX11-NEXT: ret float [[TMP5]]
1128 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1129 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1130 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1131 ; GFX12-NEXT: ret float [[TMP5]]
1133 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1137 define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, float %value) #0 {
1138 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
1139 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1140 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1141 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1142 ; GFX803: atomicrmw.start:
1143 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1144 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1145 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1146 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1147 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1148 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1149 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1150 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1151 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1152 ; GFX803: atomicrmw.end:
1153 ; GFX803-NEXT: ret float [[TMP5]]
1155 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
1156 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1157 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1158 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1159 ; GFX906: atomicrmw.start:
1160 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1161 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1162 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1163 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1164 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1165 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1166 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1167 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1168 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1169 ; GFX906: atomicrmw.end:
1170 ; GFX906-NEXT: ret float [[TMP5]]
1172 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
1173 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1174 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1175 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1176 ; GFX908: atomicrmw.start:
1177 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1178 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1179 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1180 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1181 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1182 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1183 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1184 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1185 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1186 ; GFX908: atomicrmw.end:
1187 ; GFX908-NEXT: ret float [[TMP5]]
1189 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
1190 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1191 ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1192 ; GFX90A-NEXT: ret float [[TMP5]]
1194 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
1195 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1196 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1197 ; GFX940-NEXT: ret float [[RES]]
1199 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
1200 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1201 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1202 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1203 ; GFX10: atomicrmw.start:
1204 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1205 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1206 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1207 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1208 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1209 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1210 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1211 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1212 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1213 ; GFX10: atomicrmw.end:
1214 ; GFX10-NEXT: ret float [[TMP5]]
1216 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
1217 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1218 ; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1219 ; GFX11-NEXT: ret float [[TMP5]]
1221 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
1222 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1223 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1224 ; GFX12-NEXT: ret float [[TMP5]]
1226 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1230 define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, float %value) #1 {
1231 ; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
1232 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1233 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1234 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1235 ; GFX803: atomicrmw.start:
1236 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1237 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1238 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1239 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1240 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1241 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1242 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1243 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1244 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1245 ; GFX803: atomicrmw.end:
1246 ; GFX803-NEXT: ret float [[TMP5]]
1248 ; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
1249 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1250 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1251 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1252 ; GFX906: atomicrmw.start:
1253 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1254 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1255 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1256 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1257 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1258 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1259 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1260 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1261 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1262 ; GFX906: atomicrmw.end:
1263 ; GFX906-NEXT: ret float [[TMP5]]
1265 ; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
1266 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1267 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1268 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1269 ; GFX908: atomicrmw.start:
1270 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1271 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1272 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1273 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1274 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1275 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1276 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1277 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1278 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1279 ; GFX908: atomicrmw.end:
1280 ; GFX908-NEXT: ret float [[TMP5]]
1282 ; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
1283 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1284 ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1285 ; GFX90A-NEXT: ret float [[TMP5]]
1287 ; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
1288 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1289 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1290 ; GFX940-NEXT: ret float [[RES]]
1292 ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
1293 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1294 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1295 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1296 ; GFX10: atomicrmw.start:
1297 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1298 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1299 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1300 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1301 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1302 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1303 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1304 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1305 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1306 ; GFX10: atomicrmw.end:
1307 ; GFX10-NEXT: ret float [[TMP5]]
1309 ; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
1310 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1311 ; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1312 ; GFX11-NEXT: ret float [[TMP5]]
1314 ; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
1315 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1316 ; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
1317 ; GFX12-NEXT: ret float [[TMP5]]
1319 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1323 ;---------------------------------------------------------------------
1324 ; atomicrmw fadd (no return)
1325 ;---------------------------------------------------------------------
1327 define void @test_atomicrmw_fadd_noret_f32_global_agent(ptr addrspace(1) %ptr, float %value) {
1328 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent(
1329 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1330 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1331 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1332 ; GFX803: atomicrmw.start:
1333 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1334 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1335 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1336 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1337 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
1338 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1339 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1340 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1341 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1342 ; GFX803: atomicrmw.end:
1343 ; GFX803-NEXT: ret void
1345 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent(
1346 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1347 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1348 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1349 ; GFX906: atomicrmw.start:
1350 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1351 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1352 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1353 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1354 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
1355 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1356 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1357 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1358 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1359 ; GFX906: atomicrmw.end:
1360 ; GFX906-NEXT: ret void
1362 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent(
1363 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1364 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1365 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1366 ; GFX908: atomicrmw.start:
1367 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1368 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1369 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1370 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1371 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
1372 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1373 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1374 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1375 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1376 ; GFX908: atomicrmw.end:
1377 ; GFX908-NEXT: ret void
1379 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent(
1380 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1381 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1382 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
1383 ; GFX90A: atomicrmw.start:
1384 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1385 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1386 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1387 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1388 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
1389 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1390 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1391 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1392 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1393 ; GFX90A: atomicrmw.end:
1394 ; GFX90A-NEXT: ret void
1396 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent(
1397 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1398 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4
1399 ; GFX940-NEXT: ret void
1401 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent(
1402 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1403 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1404 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1405 ; GFX10: atomicrmw.start:
1406 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1407 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1408 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1409 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1410 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
1411 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1412 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1413 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1414 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1415 ; GFX10: atomicrmw.end:
1416 ; GFX10-NEXT: ret void
1418 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent(
1419 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1420 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1421 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
1422 ; GFX11: atomicrmw.start:
1423 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1424 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1425 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1426 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1427 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
1428 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1429 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1430 ; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1431 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1432 ; GFX11: atomicrmw.end:
1433 ; GFX11-NEXT: ret void
1435 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent(
1436 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1437 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4
1438 ; GFX12-NEXT: ret void
1440 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst
1444 define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
1445 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory(
1446 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1447 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1448 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1449 ; GFX803: atomicrmw.start:
1450 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1451 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1452 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1453 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1454 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
1455 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1456 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1457 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1458 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1459 ; GFX803: atomicrmw.end:
1460 ; GFX803-NEXT: ret void
1462 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory(
1463 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1464 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1465 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1466 ; GFX906: atomicrmw.start:
1467 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1468 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1469 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1470 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1471 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
1472 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1473 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1474 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1475 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1476 ; GFX906: atomicrmw.end:
1477 ; GFX906-NEXT: ret void
1479 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory(
1480 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1481 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1482 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1483 ; GFX908: atomicrmw.start:
1484 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1485 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1486 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1487 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1488 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
1489 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1490 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1491 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1492 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1493 ; GFX908: atomicrmw.end:
1494 ; GFX908-NEXT: ret void
1496 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory(
1497 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1498 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1499 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
1500 ; GFX90A: atomicrmw.start:
1501 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1502 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1503 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1504 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1505 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
1506 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1507 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1508 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1509 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1510 ; GFX90A: atomicrmw.end:
1511 ; GFX90A-NEXT: ret void
1513 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory(
1514 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1515 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
1516 ; GFX940-NEXT: ret void
1518 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory(
1519 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1520 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1521 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1522 ; GFX10: atomicrmw.start:
1523 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1524 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1525 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1526 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1527 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
1528 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1529 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1530 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1531 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1532 ; GFX10: atomicrmw.end:
1533 ; GFX10-NEXT: ret void
1535 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory(
1536 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1537 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
1538 ; GFX11-NEXT: ret void
1540 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory(
1541 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1542 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
1543 ; GFX12-NEXT: ret void
1545 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
1549 define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
1550 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory(
1551 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1552 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1553 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1554 ; GFX803: atomicrmw.start:
1555 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1556 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1557 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1558 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1559 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
1560 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1561 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1562 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1563 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1564 ; GFX803: atomicrmw.end:
1565 ; GFX803-NEXT: ret void
1567 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory(
1568 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1569 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1570 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1571 ; GFX906: atomicrmw.start:
1572 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1573 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1574 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1575 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1576 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
1577 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1578 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1579 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1580 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1581 ; GFX906: atomicrmw.end:
1582 ; GFX906-NEXT: ret void
1584 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory(
1585 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1586 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1587 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1588 ; GFX908: atomicrmw.start:
1589 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1590 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1591 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1592 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1593 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
1594 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1595 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1596 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1597 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1598 ; GFX908: atomicrmw.end:
1599 ; GFX908-NEXT: ret void
1601 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory(
1602 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1603 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1604 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
1605 ; GFX90A: atomicrmw.start:
1606 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1607 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1608 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1609 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1610 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
1611 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1612 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1613 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1614 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1615 ; GFX90A: atomicrmw.end:
1616 ; GFX90A-NEXT: ret void
1618 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory(
1619 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1620 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
1621 ; GFX940-NEXT: ret void
1623 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory(
1624 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1625 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1626 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1627 ; GFX10: atomicrmw.start:
1628 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1629 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1630 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1631 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1632 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
1633 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1634 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1635 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1636 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1637 ; GFX10: atomicrmw.end:
1638 ; GFX10-NEXT: ret void
1640 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory(
1641 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1642 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1643 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
1644 ; GFX11: atomicrmw.start:
1645 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1646 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1647 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1648 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1649 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
1650 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1651 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1652 ; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1653 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1654 ; GFX11: atomicrmw.end:
1655 ; GFX11-NEXT: ret void
1657 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory(
1658 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1659 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
1660 ; GFX12-NEXT: ret void
1662 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
1666 define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
1667 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1668 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1669 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1670 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1671 ; GFX803: atomicrmw.start:
1672 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1673 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1674 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1675 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1676 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1677 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1678 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1679 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1680 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1681 ; GFX803: atomicrmw.end:
1682 ; GFX803-NEXT: ret void
1684 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1685 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1686 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1687 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1688 ; GFX906: atomicrmw.start:
1689 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1690 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1691 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1692 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1693 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1694 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1695 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1696 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1697 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1698 ; GFX906: atomicrmw.end:
1699 ; GFX906-NEXT: ret void
1701 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1702 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1703 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1704 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1705 ; GFX908: atomicrmw.start:
1706 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1707 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1708 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1709 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1710 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1711 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1712 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1713 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1714 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1715 ; GFX908: atomicrmw.end:
1716 ; GFX908-NEXT: ret void
1718 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1719 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1720 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1721 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
1722 ; GFX90A: atomicrmw.start:
1723 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1724 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1725 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1726 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1727 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1728 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1729 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1730 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1731 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1732 ; GFX90A: atomicrmw.end:
1733 ; GFX90A-NEXT: ret void
1735 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1736 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1737 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1738 ; GFX940-NEXT: ret void
1740 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1741 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1742 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1743 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1744 ; GFX10: atomicrmw.start:
1745 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1746 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1747 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1748 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1749 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1750 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1751 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1752 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1753 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1754 ; GFX10: atomicrmw.end:
1755 ; GFX10-NEXT: ret void
1757 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1758 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1759 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1760 ; GFX11-NEXT: ret void
1762 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1763 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1764 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1765 ; GFX12-NEXT: ret void
1767 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
1771 define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(ptr addrspace(1) %ptr, float %value) #0 {
1772 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
1773 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1774 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1775 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1776 ; GFX803: atomicrmw.start:
1777 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1778 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1779 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1780 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1781 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1782 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1783 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1784 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1785 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1786 ; GFX803: atomicrmw.end:
1787 ; GFX803-NEXT: ret void
1789 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
1790 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1791 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1792 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1793 ; GFX906: atomicrmw.start:
1794 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1795 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1796 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1797 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1798 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1799 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1800 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1801 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1802 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1803 ; GFX906: atomicrmw.end:
1804 ; GFX906-NEXT: ret void
1806 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
1807 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1808 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1809 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1810 ; GFX908: atomicrmw.start:
1811 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1812 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1813 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1814 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1815 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1816 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1817 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1818 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1819 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1820 ; GFX908: atomicrmw.end:
1821 ; GFX908-NEXT: ret void
1823 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
1824 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1825 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1826 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
1827 ; GFX90A: atomicrmw.start:
1828 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1829 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1830 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1831 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1832 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1833 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1834 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1835 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1836 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1837 ; GFX90A: atomicrmw.end:
1838 ; GFX90A-NEXT: ret void
1840 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
1841 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1842 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1843 ; GFX940-NEXT: ret void
1845 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
1846 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1847 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1848 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1849 ; GFX10: atomicrmw.start:
1850 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1851 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1852 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1853 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1854 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1855 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1856 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1857 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1858 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1859 ; GFX10: atomicrmw.end:
1860 ; GFX10-NEXT: ret void
1862 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
1863 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1864 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1865 ; GFX11-NEXT: ret void
1867 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(
1868 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
1869 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1870 ; GFX12-NEXT: ret void
1872 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
1876 define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(ptr addrspace(1) %ptr, float %value) #1 {
1877 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
1878 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1879 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1880 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1881 ; GFX803: atomicrmw.start:
1882 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1883 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1884 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1885 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1886 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1887 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1888 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1889 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1890 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1891 ; GFX803: atomicrmw.end:
1892 ; GFX803-NEXT: ret void
1894 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
1895 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1896 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1897 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1898 ; GFX906: atomicrmw.start:
1899 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1900 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1901 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1902 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1903 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1904 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1905 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1906 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1907 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1908 ; GFX906: atomicrmw.end:
1909 ; GFX906-NEXT: ret void
1911 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
1912 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1913 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1914 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1915 ; GFX908: atomicrmw.start:
1916 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1917 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1918 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1919 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1920 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1921 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1922 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1923 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1924 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1925 ; GFX908: atomicrmw.end:
1926 ; GFX908-NEXT: ret void
1928 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
1929 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1930 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1931 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
1932 ; GFX90A: atomicrmw.start:
1933 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1934 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1935 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1936 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1937 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1938 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1939 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1940 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1941 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1942 ; GFX90A: atomicrmw.end:
1943 ; GFX90A-NEXT: ret void
1945 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
1946 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1947 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1948 ; GFX940-NEXT: ret void
1950 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
1951 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1952 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1953 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1954 ; GFX10: atomicrmw.start:
1955 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1956 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1957 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1958 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1959 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1960 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1961 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1962 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1963 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1964 ; GFX10: atomicrmw.end:
1965 ; GFX10-NEXT: ret void
1967 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
1968 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1969 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1970 ; GFX11-NEXT: ret void
1972 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(
1973 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
1974 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1975 ; GFX12-NEXT: ret void
1977 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
1981 define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) {
1982 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode(
1983 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
1984 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
1985 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1986 ; GFX803: atomicrmw.start:
1987 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1988 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
1989 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
1990 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
1991 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
1992 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1993 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
1994 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
1995 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1996 ; GFX803: atomicrmw.end:
1997 ; GFX803-NEXT: ret void
1999 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode(
2000 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2001 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2002 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
2003 ; GFX906: atomicrmw.start:
2004 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2005 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2006 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2007 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2008 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
2009 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2010 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2011 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2012 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2013 ; GFX906: atomicrmw.end:
2014 ; GFX906-NEXT: ret void
2016 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode(
2017 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2018 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2019 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
2020 ; GFX908: atomicrmw.start:
2021 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2022 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2023 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2024 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2025 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
2026 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2027 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2028 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2029 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2030 ; GFX908: atomicrmw.end:
2031 ; GFX908-NEXT: ret void
2033 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode(
2034 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2035 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2036 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
2037 ; GFX90A: atomicrmw.start:
2038 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2039 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2040 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2041 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2042 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
2043 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2044 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2045 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2046 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2047 ; GFX90A: atomicrmw.end:
2048 ; GFX90A-NEXT: ret void
2050 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode(
2051 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2052 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]]
2053 ; GFX940-NEXT: ret void
2055 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode(
2056 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2057 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2058 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
2059 ; GFX10: atomicrmw.start:
2060 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2061 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2062 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2063 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2064 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
2065 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2066 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2067 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2068 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2069 ; GFX10: atomicrmw.end:
2070 ; GFX10-NEXT: ret void
2072 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode(
2073 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2074 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2075 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
2076 ; GFX11: atomicrmw.start:
2077 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2078 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2079 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2080 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2081 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
2082 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2083 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2084 ; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2085 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2086 ; GFX11: atomicrmw.end:
2087 ; GFX11-NEXT: ret void
2089 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode(
2090 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2091 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]]
2092 ; GFX12-NEXT: ret void
2094 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
2098 define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
2099 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
2100 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2101 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2102 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
2103 ; GFX803: atomicrmw.start:
2104 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2105 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2106 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2107 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2108 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2109 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2110 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2111 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2112 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2113 ; GFX803: atomicrmw.end:
2114 ; GFX803-NEXT: ret void
2116 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
2117 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2118 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2119 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
2120 ; GFX906: atomicrmw.start:
2121 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2122 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2123 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2124 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2125 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2126 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2127 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2128 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2129 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2130 ; GFX906: atomicrmw.end:
2131 ; GFX906-NEXT: ret void
2133 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
2134 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2135 ; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2136 ; GFX908-NEXT: ret void
2138 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
2139 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2140 ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2141 ; GFX90A-NEXT: ret void
2143 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
2144 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2145 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2146 ; GFX940-NEXT: ret void
2148 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
2149 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2150 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2151 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
2152 ; GFX10: atomicrmw.start:
2153 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2154 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2155 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2156 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2157 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2158 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2159 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2160 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2161 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2162 ; GFX10: atomicrmw.end:
2163 ; GFX10-NEXT: ret void
2165 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
2166 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2167 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2168 ; GFX11-NEXT: ret void
2170 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
2171 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2172 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2173 ; GFX12-NEXT: ret void
2175 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
2179 define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
2180 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
2181 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2182 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2183 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
2184 ; GFX803: atomicrmw.start:
2185 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2186 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2187 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2188 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2189 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
2190 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2191 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2192 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2193 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2194 ; GFX803: atomicrmw.end:
2195 ; GFX803-NEXT: ret void
2197 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
2198 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2199 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2200 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
2201 ; GFX906: atomicrmw.start:
2202 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2203 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2204 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2205 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2206 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
2207 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2208 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2209 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2210 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2211 ; GFX906: atomicrmw.end:
2212 ; GFX906-NEXT: ret void
2214 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
2215 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2216 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2217 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
2218 ; GFX908: atomicrmw.start:
2219 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2220 ; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2221 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2222 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2223 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
2224 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2225 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2226 ; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2227 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2228 ; GFX908: atomicrmw.end:
2229 ; GFX908-NEXT: ret void
2231 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
2232 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2233 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2234 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
2235 ; GFX90A: atomicrmw.start:
2236 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2237 ; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2238 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2239 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2240 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
2241 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2242 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2243 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2244 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2245 ; GFX90A: atomicrmw.end:
2246 ; GFX90A-NEXT: ret void
2248 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
2249 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2250 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2251 ; GFX940-NEXT: ret void
2253 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
2254 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2255 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2256 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
2257 ; GFX10: atomicrmw.start:
2258 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2259 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2260 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2261 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2262 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
2263 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2264 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2265 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2266 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2267 ; GFX10: atomicrmw.end:
2268 ; GFX10-NEXT: ret void
2270 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
2271 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2272 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2273 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
2274 ; GFX11: atomicrmw.start:
2275 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2276 ; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2277 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2278 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2279 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
2280 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2281 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2282 ; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2283 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2284 ; GFX11: atomicrmw.end:
2285 ; GFX11-NEXT: ret void
2287 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
2288 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2289 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2290 ; GFX12-NEXT: ret void
2292 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
2296 define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
2297 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2298 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2299 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2300 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
2301 ; GFX803: atomicrmw.start:
2302 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2303 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2304 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2305 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2306 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2307 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2308 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2309 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2310 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2311 ; GFX803: atomicrmw.end:
2312 ; GFX803-NEXT: ret void
2314 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2315 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2316 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2317 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
2318 ; GFX906: atomicrmw.start:
2319 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2320 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2321 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2322 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2323 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2324 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2325 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2326 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2327 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2328 ; GFX906: atomicrmw.end:
2329 ; GFX906-NEXT: ret void
2331 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2332 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2333 ; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2334 ; GFX908-NEXT: ret void
2336 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2337 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2338 ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2339 ; GFX90A-NEXT: ret void
2341 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2342 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2343 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2344 ; GFX940-NEXT: ret void
2346 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2347 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2348 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2349 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
2350 ; GFX10: atomicrmw.start:
2351 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2352 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2353 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2354 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2355 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2356 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2357 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2358 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2359 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2360 ; GFX10: atomicrmw.end:
2361 ; GFX10-NEXT: ret void
2363 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2364 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2365 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2366 ; GFX11-NEXT: ret void
2368 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2369 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2370 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2371 ; GFX12-NEXT: ret void
2373 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
2377 define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, float %value) #0 {
2378 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
2379 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
2380 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2381 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
2382 ; GFX803: atomicrmw.start:
2383 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2384 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2385 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2386 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2387 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2388 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2389 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2390 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2391 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2392 ; GFX803: atomicrmw.end:
2393 ; GFX803-NEXT: ret void
2395 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
2396 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
2397 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2398 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
2399 ; GFX906: atomicrmw.start:
2400 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2401 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2402 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2403 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2404 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2405 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2406 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2407 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2408 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2409 ; GFX906: atomicrmw.end:
2410 ; GFX906-NEXT: ret void
2412 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
2413 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
2414 ; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2415 ; GFX908-NEXT: ret void
2417 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
2418 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
2419 ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2420 ; GFX90A-NEXT: ret void
2422 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
2423 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
2424 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2425 ; GFX940-NEXT: ret void
2427 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
2428 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
2429 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2430 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
2431 ; GFX10: atomicrmw.start:
2432 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2433 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2434 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2435 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2436 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2437 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2438 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2439 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2440 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2441 ; GFX10: atomicrmw.end:
2442 ; GFX10-NEXT: ret void
2444 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
2445 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
2446 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2447 ; GFX11-NEXT: ret void
2449 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
2450 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] {
2451 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2452 ; GFX12-NEXT: ret void
2454 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
2458 define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, float %value) #1 {
2459 ; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
2460 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
2461 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2462 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
2463 ; GFX803: atomicrmw.start:
2464 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2465 ; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2466 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2467 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2468 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2469 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2470 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2471 ; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2472 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2473 ; GFX803: atomicrmw.end:
2474 ; GFX803-NEXT: ret void
2476 ; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
2477 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
2478 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2479 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
2480 ; GFX906: atomicrmw.start:
2481 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2482 ; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2483 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2484 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2485 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2486 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2487 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2488 ; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2489 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2490 ; GFX906: atomicrmw.end:
2491 ; GFX906-NEXT: ret void
2493 ; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
2494 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
2495 ; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2496 ; GFX908-NEXT: ret void
2498 ; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
2499 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
2500 ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2501 ; GFX90A-NEXT: ret void
2503 ; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
2504 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
2505 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2506 ; GFX940-NEXT: ret void
2508 ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
2509 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
2510 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2511 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
2512 ; GFX10: atomicrmw.start:
2513 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2514 ; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]]
2515 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2516 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2517 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2518 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2519 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2520 ; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2521 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2522 ; GFX10: atomicrmw.end:
2523 ; GFX10-NEXT: ret void
2525 ; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
2526 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
2527 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2528 ; GFX11-NEXT: ret void
2530 ; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
2531 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] {
2532 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
2533 ; GFX12-NEXT: ret void
2535 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
2539 ;---------------------------------------------------------------------
2541 ;---------------------------------------------------------------------
2543 define float @test_atomicrmw_fsub_f32_global_agent(ptr addrspace(1) %ptr, float %value) {
2544 ; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent(
2545 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2546 ; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2547 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2548 ; COMMON: atomicrmw.start:
2549 ; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
2550 ; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]]
2551 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2552 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2553 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
2554 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2555 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2556 ; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
2557 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2558 ; COMMON: atomicrmw.end:
2559 ; COMMON-NEXT: ret float [[RES]]
2561 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst
2565 define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
2566 ; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_fine_grained_memory(
2567 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2568 ; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2569 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2570 ; COMMON: atomicrmw.start:
2571 ; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
2572 ; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]]
2573 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2574 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2575 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2576 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2577 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2578 ; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
2579 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2580 ; COMMON: atomicrmw.end:
2581 ; COMMON-NEXT: ret float [[RES]]
2583 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
2587 define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
2588 ; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_remote_memory(
2589 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2590 ; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2591 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2592 ; COMMON: atomicrmw.start:
2593 ; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
2594 ; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]]
2595 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2596 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2597 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
2598 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2599 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2600 ; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
2601 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2602 ; COMMON: atomicrmw.end:
2603 ; COMMON-NEXT: ret float [[RES]]
2605 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
2609 define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
2610 ; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2611 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2612 ; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2613 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2614 ; COMMON: atomicrmw.start:
2615 ; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
2616 ; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]]
2617 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2618 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2619 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2620 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2621 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2622 ; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
2623 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2624 ; COMMON: atomicrmw.end:
2625 ; COMMON-NEXT: ret float [[RES]]
2627 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
2631 define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) {
2632 ; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode(
2633 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2634 ; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2635 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2636 ; COMMON: atomicrmw.start:
2637 ; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2638 ; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]]
2639 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2640 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2641 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
2642 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2643 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2644 ; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2645 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2646 ; COMMON: atomicrmw.end:
2647 ; COMMON-NEXT: ret float [[TMP5]]
2649 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
2653 define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
2654 ; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
2655 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2656 ; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2657 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2658 ; COMMON: atomicrmw.start:
2659 ; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2660 ; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]]
2661 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2662 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2663 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2664 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2665 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2666 ; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2667 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2668 ; COMMON: atomicrmw.end:
2669 ; COMMON-NEXT: ret float [[TMP5]]
2671 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
2675 define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
2676 ; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
2677 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2678 ; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2679 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2680 ; COMMON: atomicrmw.start:
2681 ; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2682 ; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]]
2683 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2684 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2685 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
2686 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2687 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2688 ; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2689 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2690 ; COMMON: atomicrmw.end:
2691 ; COMMON-NEXT: ret float [[TMP5]]
2693 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
2697 define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
2698 ; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2699 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2700 ; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2701 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2702 ; COMMON: atomicrmw.start:
2703 ; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
2704 ; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]]
2705 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
2706 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
2707 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2708 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
2709 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
2710 ; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
2711 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2712 ; COMMON: atomicrmw.end:
2713 ; COMMON-NEXT: ret float [[TMP5]]
2715 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
2719 ;---------------------------------------------------------------------
2721 ;---------------------------------------------------------------------
2723 define float @test_atomicrmw_fmax_f32_global_agent(ptr addrspace(1) %ptr, float %value) {
2724 ; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent(
2725 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2726 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2727 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
2728 ; GFX803: atomicrmw.start:
2729 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2730 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2731 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2732 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2733 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
2734 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2735 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2736 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2737 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2738 ; GFX803: atomicrmw.end:
2739 ; GFX803-NEXT: ret float [[TMP6]]
2741 ; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent(
2742 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2743 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2744 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
2745 ; GFX906: atomicrmw.start:
2746 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2747 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2748 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2749 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2750 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
2751 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2752 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2753 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2754 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2755 ; GFX906: atomicrmw.end:
2756 ; GFX906-NEXT: ret float [[TMP6]]
2758 ; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent(
2759 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2760 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2761 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
2762 ; GFX908: atomicrmw.start:
2763 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2764 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2765 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2766 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2767 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
2768 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2769 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2770 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2771 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2772 ; GFX908: atomicrmw.end:
2773 ; GFX908-NEXT: ret float [[TMP6]]
2775 ; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent(
2776 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2777 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2778 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
2779 ; GFX90A: atomicrmw.start:
2780 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2781 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2782 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2783 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2784 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
2785 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2786 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2787 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2788 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2789 ; GFX90A: atomicrmw.end:
2790 ; GFX90A-NEXT: ret float [[TMP6]]
2792 ; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent(
2793 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2794 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2795 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
2796 ; GFX940: atomicrmw.start:
2797 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2798 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2799 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2800 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2801 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
2802 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2803 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2804 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2805 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2806 ; GFX940: atomicrmw.end:
2807 ; GFX940-NEXT: ret float [[TMP6]]
2809 ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent(
2810 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2811 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2812 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
2813 ; GFX10: atomicrmw.start:
2814 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2815 ; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2816 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2817 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2818 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
2819 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2820 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2821 ; GFX10-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2822 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2823 ; GFX10: atomicrmw.end:
2824 ; GFX10-NEXT: ret float [[TMP6]]
2826 ; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent(
2827 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2828 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2829 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
2830 ; GFX11: atomicrmw.start:
2831 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2832 ; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2833 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2834 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2835 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
2836 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2837 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2838 ; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2839 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2840 ; GFX11: atomicrmw.end:
2841 ; GFX11-NEXT: ret float [[TMP6]]
2843 ; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent(
2844 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2845 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4
2846 ; GFX12-NEXT: ret float [[RES]]
2848 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst
2852 define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
2853 ; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory(
2854 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2855 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2856 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
2857 ; GFX803: atomicrmw.start:
2858 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2859 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2860 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2861 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2862 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2863 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2864 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2865 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2866 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2867 ; GFX803: atomicrmw.end:
2868 ; GFX803-NEXT: ret float [[TMP6]]
2870 ; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory(
2871 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2872 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2873 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
2874 ; GFX906: atomicrmw.start:
2875 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2876 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2877 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2878 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2879 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2880 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2881 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2882 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2883 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2884 ; GFX906: atomicrmw.end:
2885 ; GFX906-NEXT: ret float [[TMP6]]
2887 ; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory(
2888 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2889 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2890 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
2891 ; GFX908: atomicrmw.start:
2892 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2893 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2894 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2895 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2896 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2897 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2898 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2899 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2900 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2901 ; GFX908: atomicrmw.end:
2902 ; GFX908-NEXT: ret float [[TMP6]]
2904 ; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory(
2905 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2906 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2907 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
2908 ; GFX90A: atomicrmw.start:
2909 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2910 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2911 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2912 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2913 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2914 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2915 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2916 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2917 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2918 ; GFX90A: atomicrmw.end:
2919 ; GFX90A-NEXT: ret float [[TMP6]]
2921 ; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory(
2922 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2923 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2924 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
2925 ; GFX940: atomicrmw.start:
2926 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2927 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2928 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2929 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2930 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2931 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2932 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2933 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2934 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2935 ; GFX940: atomicrmw.end:
2936 ; GFX940-NEXT: ret float [[TMP6]]
2938 ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory(
2939 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2940 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2941 ; GFX10-NEXT: ret float [[RES]]
2943 ; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory(
2944 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2945 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2946 ; GFX11-NEXT: ret float [[RES]]
2948 ; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory(
2949 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2950 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
2951 ; GFX12-NEXT: ret float [[RES]]
2953 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
2957 define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
2958 ; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory(
2959 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2960 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2961 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
2962 ; GFX803: atomicrmw.start:
2963 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2964 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2965 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2966 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2967 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
2968 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2969 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2970 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2971 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2972 ; GFX803: atomicrmw.end:
2973 ; GFX803-NEXT: ret float [[TMP6]]
2975 ; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory(
2976 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2977 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2978 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
2979 ; GFX906: atomicrmw.start:
2980 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2981 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2982 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
2983 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
2984 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
2985 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
2986 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
2987 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
2988 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2989 ; GFX906: atomicrmw.end:
2990 ; GFX906-NEXT: ret float [[TMP6]]
2992 ; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory(
2993 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
2994 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
2995 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
2996 ; GFX908: atomicrmw.start:
2997 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2998 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
2999 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3000 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3001 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3002 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3003 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3004 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3005 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3006 ; GFX908: atomicrmw.end:
3007 ; GFX908-NEXT: ret float [[TMP6]]
3009 ; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory(
3010 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3011 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3012 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
3013 ; GFX90A: atomicrmw.start:
3014 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3015 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3016 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3017 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3018 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3019 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3020 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3021 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3022 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3023 ; GFX90A: atomicrmw.end:
3024 ; GFX90A-NEXT: ret float [[TMP6]]
3026 ; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory(
3027 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3028 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3029 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
3030 ; GFX940: atomicrmw.start:
3031 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3032 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3033 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3034 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3035 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3036 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3037 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3038 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3039 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3040 ; GFX940: atomicrmw.end:
3041 ; GFX940-NEXT: ret float [[TMP6]]
3043 ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory(
3044 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3045 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3046 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
3047 ; GFX10: atomicrmw.start:
3048 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
3049 ; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3050 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3051 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3052 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3053 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3054 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3055 ; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
3056 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3057 ; GFX10: atomicrmw.end:
3058 ; GFX10-NEXT: ret float [[RES]]
3060 ; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory(
3061 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3062 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3063 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
3064 ; GFX11: atomicrmw.start:
3065 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
3066 ; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3067 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3068 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3069 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3070 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3071 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3072 ; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
3073 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3074 ; GFX11: atomicrmw.end:
3075 ; GFX11-NEXT: ret float [[RES]]
3077 ; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory(
3078 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3079 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3080 ; GFX12-NEXT: ret float [[RES]]
3082 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
3086 define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
3087 ; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3088 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3089 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3090 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
3091 ; GFX803: atomicrmw.start:
3092 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3093 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3094 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3095 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3096 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3097 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3098 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3099 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3100 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3101 ; GFX803: atomicrmw.end:
3102 ; GFX803-NEXT: ret float [[TMP6]]
3104 ; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3105 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3106 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3107 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
3108 ; GFX906: atomicrmw.start:
3109 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3110 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3111 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3112 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3113 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3114 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3115 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3116 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3117 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3118 ; GFX906: atomicrmw.end:
3119 ; GFX906-NEXT: ret float [[TMP6]]
3121 ; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3122 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3123 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3124 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
3125 ; GFX908: atomicrmw.start:
3126 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3127 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3128 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3129 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3130 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3131 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3132 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3133 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3134 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3135 ; GFX908: atomicrmw.end:
3136 ; GFX908-NEXT: ret float [[TMP6]]
3138 ; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3139 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3140 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3141 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
3142 ; GFX90A: atomicrmw.start:
3143 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3144 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3145 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3146 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3147 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3148 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3149 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3150 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3151 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3152 ; GFX90A: atomicrmw.end:
3153 ; GFX90A-NEXT: ret float [[TMP6]]
3155 ; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3156 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3157 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3158 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
3159 ; GFX940: atomicrmw.start:
3160 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3161 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3162 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3163 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3164 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3165 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3166 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3167 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3168 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3169 ; GFX940: atomicrmw.end:
3170 ; GFX940-NEXT: ret float [[TMP6]]
3172 ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3173 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3174 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3175 ; GFX10-NEXT: ret float [[RES]]
3177 ; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3178 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3179 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3180 ; GFX11-NEXT: ret float [[RES]]
3182 ; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3183 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3184 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3185 ; GFX12-NEXT: ret float [[RES]]
3187 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
3191 define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) {
3192 ; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode(
3193 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3194 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3195 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
3196 ; GFX803: atomicrmw.start:
3197 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3198 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3199 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3200 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3201 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3202 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3203 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3204 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3205 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3206 ; GFX803: atomicrmw.end:
3207 ; GFX803-NEXT: ret float [[TMP6]]
3209 ; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode(
3210 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3211 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3212 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
3213 ; GFX906: atomicrmw.start:
3214 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3215 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3216 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3217 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3218 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3219 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3220 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3221 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3222 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3223 ; GFX906: atomicrmw.end:
3224 ; GFX906-NEXT: ret float [[TMP6]]
3226 ; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode(
3227 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3228 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3229 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
3230 ; GFX908: atomicrmw.start:
3231 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3232 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3233 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3234 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3235 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3236 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3237 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3238 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3239 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3240 ; GFX908: atomicrmw.end:
3241 ; GFX908-NEXT: ret float [[TMP6]]
3243 ; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode(
3244 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3245 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3246 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
3247 ; GFX90A: atomicrmw.start:
3248 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3249 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3250 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3251 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3252 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3253 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3254 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3255 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3256 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3257 ; GFX90A: atomicrmw.end:
3258 ; GFX90A-NEXT: ret float [[TMP6]]
3260 ; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode(
3261 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3262 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3263 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
3264 ; GFX940: atomicrmw.start:
3265 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3266 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3267 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3268 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3269 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3270 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3271 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3272 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3273 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3274 ; GFX940: atomicrmw.end:
3275 ; GFX940-NEXT: ret float [[TMP6]]
3277 ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode(
3278 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3279 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3280 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
3281 ; GFX10: atomicrmw.start:
3282 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3283 ; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3284 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3285 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3286 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3287 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3288 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3289 ; GFX10-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3290 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3291 ; GFX10: atomicrmw.end:
3292 ; GFX10-NEXT: ret float [[TMP6]]
3294 ; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode(
3295 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3296 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3297 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
3298 ; GFX11: atomicrmw.start:
3299 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3300 ; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3301 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3302 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3303 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3304 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3305 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3306 ; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3307 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3308 ; GFX11: atomicrmw.end:
3309 ; GFX11-NEXT: ret float [[TMP6]]
3311 ; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode(
3312 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3313 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]]
3314 ; GFX12-NEXT: ret float [[RES]]
3316 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
3320 define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
3321 ; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
3322 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3323 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3324 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
3325 ; GFX803: atomicrmw.start:
3326 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3327 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3328 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3329 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3330 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3331 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3332 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3333 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3334 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3335 ; GFX803: atomicrmw.end:
3336 ; GFX803-NEXT: ret float [[TMP6]]
3338 ; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
3339 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3340 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3341 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
3342 ; GFX906: atomicrmw.start:
3343 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3344 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3345 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3346 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3347 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3348 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3349 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3350 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3351 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3352 ; GFX906: atomicrmw.end:
3353 ; GFX906-NEXT: ret float [[TMP6]]
3355 ; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
3356 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3357 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3358 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
3359 ; GFX908: atomicrmw.start:
3360 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3361 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3362 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3363 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3364 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3365 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3366 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3367 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3368 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3369 ; GFX908: atomicrmw.end:
3370 ; GFX908-NEXT: ret float [[TMP6]]
3372 ; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
3373 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3374 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3375 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
3376 ; GFX90A: atomicrmw.start:
3377 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3378 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3379 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3380 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3381 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3382 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3383 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3384 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3385 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3386 ; GFX90A: atomicrmw.end:
3387 ; GFX90A-NEXT: ret float [[TMP6]]
3389 ; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
3390 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3391 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3392 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
3393 ; GFX940: atomicrmw.start:
3394 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3395 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3396 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3397 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3398 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3399 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3400 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3401 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3402 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3403 ; GFX940: atomicrmw.end:
3404 ; GFX940-NEXT: ret float [[TMP6]]
3406 ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
3407 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3408 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
3409 ; GFX10-NEXT: ret float [[RES]]
3411 ; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
3412 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3413 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
3414 ; GFX11-NEXT: ret float [[RES]]
3416 ; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
3417 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3418 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
3419 ; GFX12-NEXT: ret float [[RES]]
3421 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
3425 define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
3426 ; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
3427 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3428 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3429 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
3430 ; GFX803: atomicrmw.start:
3431 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3432 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3433 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3434 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3435 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3436 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3437 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3438 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3439 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3440 ; GFX803: atomicrmw.end:
3441 ; GFX803-NEXT: ret float [[TMP6]]
3443 ; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
3444 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3445 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3446 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
3447 ; GFX906: atomicrmw.start:
3448 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3449 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3450 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3451 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3452 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3453 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3454 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3455 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3456 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3457 ; GFX906: atomicrmw.end:
3458 ; GFX906-NEXT: ret float [[TMP6]]
3460 ; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
3461 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3462 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3463 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
3464 ; GFX908: atomicrmw.start:
3465 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3466 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3467 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3468 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3469 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3470 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3471 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3472 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3473 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3474 ; GFX908: atomicrmw.end:
3475 ; GFX908-NEXT: ret float [[TMP6]]
3477 ; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
3478 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3479 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3480 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
3481 ; GFX90A: atomicrmw.start:
3482 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3483 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3484 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3485 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3486 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3487 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3488 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3489 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3490 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3491 ; GFX90A: atomicrmw.end:
3492 ; GFX90A-NEXT: ret float [[TMP6]]
3494 ; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
3495 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3496 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3497 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
3498 ; GFX940: atomicrmw.start:
3499 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3500 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3501 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3502 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3503 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3504 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3505 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3506 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3507 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3508 ; GFX940: atomicrmw.end:
3509 ; GFX940-NEXT: ret float [[TMP6]]
3511 ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
3512 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3513 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3514 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
3515 ; GFX10: atomicrmw.start:
3516 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
3517 ; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3518 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3519 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3520 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3521 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3522 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3523 ; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
3524 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3525 ; GFX10: atomicrmw.end:
3526 ; GFX10-NEXT: ret float [[RES]]
3528 ; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
3529 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3530 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3531 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
3532 ; GFX11: atomicrmw.start:
3533 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
3534 ; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3535 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3536 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3537 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3538 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3539 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3540 ; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
3541 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3542 ; GFX11: atomicrmw.end:
3543 ; GFX11-NEXT: ret float [[RES]]
3545 ; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
3546 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3547 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
3548 ; GFX12-NEXT: ret float [[RES]]
3550 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
3554 define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
3555 ; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3556 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3557 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3558 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
3559 ; GFX803: atomicrmw.start:
3560 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3561 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3562 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3563 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3564 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3565 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3566 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3567 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3568 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3569 ; GFX803: atomicrmw.end:
3570 ; GFX803-NEXT: ret float [[TMP6]]
3572 ; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3573 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3574 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3575 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
3576 ; GFX906: atomicrmw.start:
3577 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3578 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3579 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3580 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3581 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3582 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3583 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3584 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3585 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3586 ; GFX906: atomicrmw.end:
3587 ; GFX906-NEXT: ret float [[TMP6]]
3589 ; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3590 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3591 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3592 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
3593 ; GFX908: atomicrmw.start:
3594 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3595 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3596 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3597 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3598 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3599 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3600 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3601 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3602 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3603 ; GFX908: atomicrmw.end:
3604 ; GFX908-NEXT: ret float [[TMP6]]
3606 ; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3607 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3608 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3609 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
3610 ; GFX90A: atomicrmw.start:
3611 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3612 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3613 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3614 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3615 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3616 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3617 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3618 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3619 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3620 ; GFX90A: atomicrmw.end:
3621 ; GFX90A-NEXT: ret float [[TMP6]]
3623 ; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3624 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3625 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3626 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
3627 ; GFX940: atomicrmw.start:
3628 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3629 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]])
3630 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3631 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3632 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
3633 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3634 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3635 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3636 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3637 ; GFX940: atomicrmw.end:
3638 ; GFX940-NEXT: ret float [[TMP6]]
3640 ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3641 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3642 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
3643 ; GFX10-NEXT: ret float [[RES]]
3645 ; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3646 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3647 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
3648 ; GFX11-NEXT: ret float [[RES]]
3650 ; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
3651 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3652 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
3653 ; GFX12-NEXT: ret float [[RES]]
3655 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
3659 ;---------------------------------------------------------------------
3661 ;---------------------------------------------------------------------
3663 define float @test_atomicrmw_fmin_f32_global_agent(ptr addrspace(1) %ptr, float %value) {
3664 ; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent(
3665 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3666 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3667 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
3668 ; GFX803: atomicrmw.start:
3669 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3670 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3671 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3672 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3673 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3674 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3675 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3676 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3677 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3678 ; GFX803: atomicrmw.end:
3679 ; GFX803-NEXT: ret float [[TMP6]]
3681 ; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent(
3682 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3683 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3684 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
3685 ; GFX906: atomicrmw.start:
3686 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3687 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3688 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3689 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3690 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3691 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3692 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3693 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3694 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3695 ; GFX906: atomicrmw.end:
3696 ; GFX906-NEXT: ret float [[TMP6]]
3698 ; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent(
3699 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3700 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3701 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
3702 ; GFX908: atomicrmw.start:
3703 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3704 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3705 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3706 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3707 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3708 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3709 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3710 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3711 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3712 ; GFX908: atomicrmw.end:
3713 ; GFX908-NEXT: ret float [[TMP6]]
3715 ; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent(
3716 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3717 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3718 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
3719 ; GFX90A: atomicrmw.start:
3720 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3721 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3722 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3723 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3724 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3725 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3726 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3727 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3728 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3729 ; GFX90A: atomicrmw.end:
3730 ; GFX90A-NEXT: ret float [[TMP6]]
3732 ; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent(
3733 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3734 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3735 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
3736 ; GFX940: atomicrmw.start:
3737 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3738 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3739 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3740 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3741 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3742 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3743 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3744 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3745 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3746 ; GFX940: atomicrmw.end:
3747 ; GFX940-NEXT: ret float [[TMP6]]
3749 ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent(
3750 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3751 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3752 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
3753 ; GFX10: atomicrmw.start:
3754 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3755 ; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3756 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3757 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3758 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3759 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3760 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3761 ; GFX10-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3762 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3763 ; GFX10: atomicrmw.end:
3764 ; GFX10-NEXT: ret float [[TMP6]]
3766 ; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent(
3767 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3768 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3769 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
3770 ; GFX11: atomicrmw.start:
3771 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3772 ; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3773 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3774 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3775 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
3776 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3777 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3778 ; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3779 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3780 ; GFX11: atomicrmw.end:
3781 ; GFX11-NEXT: ret float [[TMP6]]
3783 ; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent(
3784 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3785 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4
3786 ; GFX12-NEXT: ret float [[RES]]
3788 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst
3792 define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
3793 ; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory(
3794 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3795 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3796 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
3797 ; GFX803: atomicrmw.start:
3798 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3799 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3800 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3801 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3802 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3803 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3804 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3805 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3806 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3807 ; GFX803: atomicrmw.end:
3808 ; GFX803-NEXT: ret float [[TMP6]]
3810 ; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory(
3811 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3812 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3813 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
3814 ; GFX906: atomicrmw.start:
3815 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3816 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3817 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3818 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3819 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3820 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3821 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3822 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3823 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3824 ; GFX906: atomicrmw.end:
3825 ; GFX906-NEXT: ret float [[TMP6]]
3827 ; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory(
3828 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3829 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3830 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
3831 ; GFX908: atomicrmw.start:
3832 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3833 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3834 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3835 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3836 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3837 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3838 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3839 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3840 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3841 ; GFX908: atomicrmw.end:
3842 ; GFX908-NEXT: ret float [[TMP6]]
3844 ; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory(
3845 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3846 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3847 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
3848 ; GFX90A: atomicrmw.start:
3849 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3850 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3851 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3852 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3853 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3854 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3855 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3856 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3857 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3858 ; GFX90A: atomicrmw.end:
3859 ; GFX90A-NEXT: ret float [[TMP6]]
3861 ; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory(
3862 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3863 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3864 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
3865 ; GFX940: atomicrmw.start:
3866 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3867 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3868 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3869 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3870 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3871 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3872 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3873 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3874 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3875 ; GFX940: atomicrmw.end:
3876 ; GFX940-NEXT: ret float [[TMP6]]
3878 ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory(
3879 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3880 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3881 ; GFX10-NEXT: ret float [[RES]]
3883 ; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory(
3884 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3885 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3886 ; GFX11-NEXT: ret float [[RES]]
3888 ; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory(
3889 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3890 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
3891 ; GFX12-NEXT: ret float [[RES]]
3893 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
3897 define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
3898 ; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory(
3899 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3900 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3901 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
3902 ; GFX803: atomicrmw.start:
3903 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3904 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3905 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3906 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3907 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3908 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3909 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3910 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3911 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3912 ; GFX803: atomicrmw.end:
3913 ; GFX803-NEXT: ret float [[TMP6]]
3915 ; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory(
3916 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3917 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3918 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
3919 ; GFX906: atomicrmw.start:
3920 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3921 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3922 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3923 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3924 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3925 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3926 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3927 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3928 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3929 ; GFX906: atomicrmw.end:
3930 ; GFX906-NEXT: ret float [[TMP6]]
3932 ; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory(
3933 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3934 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3935 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
3936 ; GFX908: atomicrmw.start:
3937 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3938 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3939 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3940 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3941 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3942 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3943 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3944 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3945 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3946 ; GFX908: atomicrmw.end:
3947 ; GFX908-NEXT: ret float [[TMP6]]
3949 ; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory(
3950 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3951 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3952 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
3953 ; GFX90A: atomicrmw.start:
3954 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3955 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3956 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3957 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3958 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3959 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3960 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3961 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3962 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3963 ; GFX90A: atomicrmw.end:
3964 ; GFX90A-NEXT: ret float [[TMP6]]
3966 ; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory(
3967 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3968 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3969 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
3970 ; GFX940: atomicrmw.start:
3971 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
3972 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3973 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3974 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3975 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3976 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3977 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3978 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
3979 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3980 ; GFX940: atomicrmw.end:
3981 ; GFX940-NEXT: ret float [[TMP6]]
3983 ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory(
3984 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
3985 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
3986 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
3987 ; GFX10: atomicrmw.start:
3988 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
3989 ; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
3990 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
3991 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
3992 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
3993 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
3994 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
3995 ; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
3996 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
3997 ; GFX10: atomicrmw.end:
3998 ; GFX10-NEXT: ret float [[RES]]
4000 ; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory(
4001 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4002 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4003 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
4004 ; GFX11: atomicrmw.start:
4005 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
4006 ; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4007 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4008 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4009 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
4010 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4011 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4012 ; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
4013 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4014 ; GFX11: atomicrmw.end:
4015 ; GFX11-NEXT: ret float [[RES]]
4017 ; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory(
4018 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4019 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
4020 ; GFX12-NEXT: ret float [[RES]]
4022 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
4026 define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
4027 ; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4028 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4029 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4030 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
4031 ; GFX803: atomicrmw.start:
4032 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4033 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4034 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4035 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4036 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4037 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4038 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4039 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4040 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4041 ; GFX803: atomicrmw.end:
4042 ; GFX803-NEXT: ret float [[TMP6]]
4044 ; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4045 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4046 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4047 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
4048 ; GFX906: atomicrmw.start:
4049 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4050 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4051 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4052 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4053 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4054 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4055 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4056 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4057 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4058 ; GFX906: atomicrmw.end:
4059 ; GFX906-NEXT: ret float [[TMP6]]
4061 ; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4062 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4063 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4064 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
4065 ; GFX908: atomicrmw.start:
4066 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4067 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4068 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4069 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4070 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4071 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4072 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4073 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4074 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4075 ; GFX908: atomicrmw.end:
4076 ; GFX908-NEXT: ret float [[TMP6]]
4078 ; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4079 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4080 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4081 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
4082 ; GFX90A: atomicrmw.start:
4083 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4084 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4085 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4086 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4087 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4088 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4089 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4090 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4091 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4092 ; GFX90A: atomicrmw.end:
4093 ; GFX90A-NEXT: ret float [[TMP6]]
4095 ; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4096 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4097 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4098 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
4099 ; GFX940: atomicrmw.start:
4100 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4101 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4102 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4103 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4104 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4105 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4106 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4107 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4108 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4109 ; GFX940: atomicrmw.end:
4110 ; GFX940-NEXT: ret float [[TMP6]]
4112 ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4113 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4114 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4115 ; GFX10-NEXT: ret float [[RES]]
4117 ; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4118 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4119 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4120 ; GFX11-NEXT: ret float [[RES]]
4122 ; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4123 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4124 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4125 ; GFX12-NEXT: ret float [[RES]]
4127 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
4131 define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) {
4132 ; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode(
4133 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4134 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4135 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
4136 ; GFX803: atomicrmw.start:
4137 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4138 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4139 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4140 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4141 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
4142 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4143 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4144 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4145 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4146 ; GFX803: atomicrmw.end:
4147 ; GFX803-NEXT: ret float [[TMP6]]
4149 ; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode(
4150 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4151 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4152 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
4153 ; GFX906: atomicrmw.start:
4154 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4155 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4156 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4157 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4158 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
4159 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4160 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4161 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4162 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4163 ; GFX906: atomicrmw.end:
4164 ; GFX906-NEXT: ret float [[TMP6]]
4166 ; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode(
4167 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4168 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4169 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
4170 ; GFX908: atomicrmw.start:
4171 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4172 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4173 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4174 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4175 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
4176 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4177 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4178 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4179 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4180 ; GFX908: atomicrmw.end:
4181 ; GFX908-NEXT: ret float [[TMP6]]
4183 ; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode(
4184 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4185 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4186 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
4187 ; GFX90A: atomicrmw.start:
4188 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4189 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4190 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4191 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4192 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
4193 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4194 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4195 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4196 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4197 ; GFX90A: atomicrmw.end:
4198 ; GFX90A-NEXT: ret float [[TMP6]]
4200 ; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode(
4201 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4202 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4203 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
4204 ; GFX940: atomicrmw.start:
4205 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4206 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4207 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4208 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4209 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
4210 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4211 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4212 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4213 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4214 ; GFX940: atomicrmw.end:
4215 ; GFX940-NEXT: ret float [[TMP6]]
4217 ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode(
4218 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4219 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4220 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
4221 ; GFX10: atomicrmw.start:
4222 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4223 ; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4224 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4225 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4226 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
4227 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4228 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4229 ; GFX10-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4230 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4231 ; GFX10: atomicrmw.end:
4232 ; GFX10-NEXT: ret float [[TMP6]]
4234 ; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode(
4235 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4236 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4237 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
4238 ; GFX11: atomicrmw.start:
4239 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4240 ; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4241 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4242 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4243 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
4244 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4245 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4246 ; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4247 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4248 ; GFX11: atomicrmw.end:
4249 ; GFX11-NEXT: ret float [[TMP6]]
4251 ; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode(
4252 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4253 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]]
4254 ; GFX12-NEXT: ret float [[RES]]
4256 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
4260 define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
4261 ; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
4262 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4263 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4264 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
4265 ; GFX803: atomicrmw.start:
4266 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4267 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4268 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4269 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4270 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
4271 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4272 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4273 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4274 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4275 ; GFX803: atomicrmw.end:
4276 ; GFX803-NEXT: ret float [[TMP6]]
4278 ; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
4279 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4280 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4281 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
4282 ; GFX906: atomicrmw.start:
4283 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4284 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4285 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4286 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4287 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
4288 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4289 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4290 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4291 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4292 ; GFX906: atomicrmw.end:
4293 ; GFX906-NEXT: ret float [[TMP6]]
4295 ; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
4296 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4297 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4298 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
4299 ; GFX908: atomicrmw.start:
4300 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4301 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4302 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4303 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4304 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
4305 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4306 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4307 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4308 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4309 ; GFX908: atomicrmw.end:
4310 ; GFX908-NEXT: ret float [[TMP6]]
4312 ; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
4313 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4314 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4315 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
4316 ; GFX90A: atomicrmw.start:
4317 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4318 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4319 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4320 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4321 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
4322 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4323 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4324 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4325 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4326 ; GFX90A: atomicrmw.end:
4327 ; GFX90A-NEXT: ret float [[TMP6]]
4329 ; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
4330 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4331 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4332 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
4333 ; GFX940: atomicrmw.start:
4334 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4335 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4336 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4337 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4338 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
4339 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4340 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4341 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4342 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4343 ; GFX940: atomicrmw.end:
4344 ; GFX940-NEXT: ret float [[TMP6]]
4346 ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
4347 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4348 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
4349 ; GFX10-NEXT: ret float [[RES]]
4351 ; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
4352 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4353 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
4354 ; GFX11-NEXT: ret float [[RES]]
4356 ; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
4357 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4358 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
4359 ; GFX12-NEXT: ret float [[RES]]
4361 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
4365 define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
4366 ; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
4367 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4368 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4369 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
4370 ; GFX803: atomicrmw.start:
4371 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4372 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4373 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4374 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4375 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
4376 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4377 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4378 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4379 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4380 ; GFX803: atomicrmw.end:
4381 ; GFX803-NEXT: ret float [[TMP6]]
4383 ; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
4384 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4385 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4386 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
4387 ; GFX906: atomicrmw.start:
4388 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4389 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4390 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4391 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4392 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
4393 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4394 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4395 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4396 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4397 ; GFX906: atomicrmw.end:
4398 ; GFX906-NEXT: ret float [[TMP6]]
4400 ; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
4401 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4402 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4403 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
4404 ; GFX908: atomicrmw.start:
4405 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4406 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4407 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4408 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4409 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
4410 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4411 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4412 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4413 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4414 ; GFX908: atomicrmw.end:
4415 ; GFX908-NEXT: ret float [[TMP6]]
4417 ; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
4418 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4419 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4420 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
4421 ; GFX90A: atomicrmw.start:
4422 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4423 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4424 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4425 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4426 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
4427 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4428 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4429 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4430 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4431 ; GFX90A: atomicrmw.end:
4432 ; GFX90A-NEXT: ret float [[TMP6]]
4434 ; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
4435 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4436 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4437 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
4438 ; GFX940: atomicrmw.start:
4439 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4440 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4441 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4442 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4443 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
4444 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4445 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4446 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4447 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4448 ; GFX940: atomicrmw.end:
4449 ; GFX940-NEXT: ret float [[TMP6]]
4451 ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
4452 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4453 ; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4454 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
4455 ; GFX10: atomicrmw.start:
4456 ; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
4457 ; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4458 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4459 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4460 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
4461 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4462 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4463 ; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
4464 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4465 ; GFX10: atomicrmw.end:
4466 ; GFX10-NEXT: ret float [[RES]]
4468 ; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
4469 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4470 ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4471 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
4472 ; GFX11: atomicrmw.start:
4473 ; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
4474 ; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4475 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4476 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4477 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
4478 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4479 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4480 ; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float
4481 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4482 ; GFX11: atomicrmw.end:
4483 ; GFX11-NEXT: ret float [[RES]]
4485 ; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
4486 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4487 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
4488 ; GFX12-NEXT: ret float [[RES]]
4490 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
4494 define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
4495 ; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4496 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4497 ; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4498 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
4499 ; GFX803: atomicrmw.start:
4500 ; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4501 ; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4502 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4503 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4504 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4505 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4506 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4507 ; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4508 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4509 ; GFX803: atomicrmw.end:
4510 ; GFX803-NEXT: ret float [[TMP6]]
4512 ; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4513 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4514 ; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4515 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
4516 ; GFX906: atomicrmw.start:
4517 ; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4518 ; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4519 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4520 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4521 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4522 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4523 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4524 ; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4525 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4526 ; GFX906: atomicrmw.end:
4527 ; GFX906-NEXT: ret float [[TMP6]]
4529 ; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4530 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4531 ; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4532 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
4533 ; GFX908: atomicrmw.start:
4534 ; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4535 ; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4536 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4537 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4538 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4539 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4540 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4541 ; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4542 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4543 ; GFX908: atomicrmw.end:
4544 ; GFX908-NEXT: ret float [[TMP6]]
4546 ; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4547 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4548 ; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4549 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
4550 ; GFX90A: atomicrmw.start:
4551 ; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4552 ; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4553 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4554 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4555 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4556 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4557 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4558 ; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4559 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4560 ; GFX90A: atomicrmw.end:
4561 ; GFX90A-NEXT: ret float [[TMP6]]
4563 ; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4564 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4565 ; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4
4566 ; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]]
4567 ; GFX940: atomicrmw.start:
4568 ; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
4569 ; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]])
4570 ; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
4571 ; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32
4572 ; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
4573 ; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
4574 ; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
4575 ; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float
4576 ; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
4577 ; GFX940: atomicrmw.end:
4578 ; GFX940-NEXT: ret float [[TMP6]]
4580 ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4581 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4582 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
4583 ; GFX10-NEXT: ret float [[RES]]
4585 ; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4586 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4587 ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
4588 ; GFX11-NEXT: ret float [[RES]]
4590 ; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
4591 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
4592 ; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
4593 ; GFX12-NEXT: ret float [[RES]]
4595 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
4599 attributes #0 = { "denormal-fp-mode-f32"="preserve-sign,preserve-sign" }
4600 attributes #1 = { "denormal-fp-mode-f32"="dynamic,dynamic" }
4604 ; GFX803: [[META0]] = !{}
4606 ; GFX906: [[META0]] = !{}
4608 ; GFX908: [[META0]] = !{}
4610 ; GFX90A: [[META0]] = !{}
4612 ; GFX940: [[META0]] = !{}
4614 ; GFX10: [[META0]] = !{}
4616 ; GFX11: [[META0]] = !{}
4618 ; GFX12: [[META0]] = !{}