1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX803 %s
3 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s
4 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s
5 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s
6 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s
7 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s
8 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s
9 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s
11 ;---------------------------------------------------------------------
13 ;---------------------------------------------------------------------
15 ; xchg is supported over PCIe, so no expansion is necessary
16 define double @test_atomicrmw_xchg_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
17 ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent(
18 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
19 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8
20 ; COMMON-NEXT: ret double [[RES]]
22 %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
26 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
27 define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
28 ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory(
29 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
30 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
31 ; COMMON-NEXT: ret double [[RES]]
33 %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
37 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
38 define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
39 ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_remote_memory(
40 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
41 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
42 ; COMMON-NEXT: ret double [[RES]]
44 %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
48 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
49 define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
50 ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
51 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
52 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
53 ; COMMON-NEXT: ret double [[RES]]
55 %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
59 ;---------------------------------------------------------------------
61 ;---------------------------------------------------------------------
63 define double @test_atomicrmw_fadd_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
64 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent(
65 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
66 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
67 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
68 ; GFX803: atomicrmw.start:
69 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
70 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
71 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
72 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
73 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
74 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
75 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
76 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
77 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
78 ; GFX803: atomicrmw.end:
79 ; GFX803-NEXT: ret double [[TMP5]]
81 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent(
82 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
83 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
84 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
85 ; GFX906: atomicrmw.start:
86 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
87 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
88 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
89 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
90 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
91 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
92 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
93 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
94 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
95 ; GFX906: atomicrmw.end:
96 ; GFX906-NEXT: ret double [[TMP5]]
98 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent(
99 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
100 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
101 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
102 ; GFX908: atomicrmw.start:
103 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
104 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
105 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
106 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
107 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
108 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
109 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
110 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
111 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
112 ; GFX908: atomicrmw.end:
113 ; GFX908-NEXT: ret double [[TMP5]]
115 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent(
116 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
117 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
118 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
119 ; GFX90A: atomicrmw.start:
120 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
121 ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
122 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
123 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
124 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
125 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
126 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
127 ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
128 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
129 ; GFX90A: atomicrmw.end:
130 ; GFX90A-NEXT: ret double [[TMP5]]
132 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent(
133 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
134 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8
135 ; GFX940-NEXT: ret double [[RES]]
137 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent(
138 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
139 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
140 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
141 ; GFX10: atomicrmw.start:
142 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
143 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
144 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
145 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
146 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
147 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
148 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
149 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
150 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
151 ; GFX10: atomicrmw.end:
152 ; GFX10-NEXT: ret double [[TMP5]]
154 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent(
155 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
156 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
157 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
158 ; GFX11: atomicrmw.start:
159 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
160 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
161 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
162 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
163 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
164 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
165 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
166 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
167 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
168 ; GFX11: atomicrmw.end:
169 ; GFX11-NEXT: ret double [[TMP5]]
171 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent(
172 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
173 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
174 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
175 ; GFX12: atomicrmw.start:
176 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
177 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
178 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
179 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
180 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
181 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
182 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
183 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
184 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
185 ; GFX12: atomicrmw.end:
186 ; GFX12-NEXT: ret double [[TMP5]]
188 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
192 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
193 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(
194 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
195 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
196 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
197 ; GFX803: atomicrmw.start:
198 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
199 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
200 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
201 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
202 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
203 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
204 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
205 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
206 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
207 ; GFX803: atomicrmw.end:
208 ; GFX803-NEXT: ret double [[TMP5]]
210 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(
211 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
212 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
213 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
214 ; GFX906: atomicrmw.start:
215 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
216 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
217 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
218 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
219 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
220 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
221 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
222 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
223 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
224 ; GFX906: atomicrmw.end:
225 ; GFX906-NEXT: ret double [[TMP5]]
227 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(
228 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
229 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
230 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
231 ; GFX908: atomicrmw.start:
232 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
233 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
234 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
235 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
236 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
237 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
238 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
239 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
240 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
241 ; GFX908: atomicrmw.end:
242 ; GFX908-NEXT: ret double [[TMP5]]
244 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(
245 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
246 ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
247 ; GFX90A-NEXT: ret double [[TMP5]]
249 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(
250 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
251 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
252 ; GFX940-NEXT: ret double [[RES]]
254 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(
255 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
256 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
257 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
258 ; GFX10: atomicrmw.start:
259 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
260 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
261 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
262 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
263 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
264 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
265 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
266 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
267 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
268 ; GFX10: atomicrmw.end:
269 ; GFX10-NEXT: ret double [[TMP5]]
271 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(
272 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
273 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
274 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
275 ; GFX11: atomicrmw.start:
276 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
277 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
278 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
279 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
280 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
281 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
282 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
283 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
284 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
285 ; GFX11: atomicrmw.end:
286 ; GFX11-NEXT: ret double [[TMP5]]
288 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(
289 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
290 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
291 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
292 ; GFX12: atomicrmw.start:
293 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
294 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
295 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
296 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
297 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
298 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
299 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
300 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
301 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
302 ; GFX12: atomicrmw.end:
303 ; GFX12-NEXT: ret double [[TMP5]]
305 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
309 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
310 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory(
311 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
312 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
313 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
314 ; GFX803: atomicrmw.start:
315 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
316 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
317 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
318 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
319 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
320 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
321 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
322 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
323 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
324 ; GFX803: atomicrmw.end:
325 ; GFX803-NEXT: ret double [[TMP5]]
327 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory(
328 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
329 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
330 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
331 ; GFX906: atomicrmw.start:
332 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
333 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
334 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
335 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
336 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
337 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
338 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
339 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
340 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
341 ; GFX906: atomicrmw.end:
342 ; GFX906-NEXT: ret double [[TMP5]]
344 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory(
345 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
346 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
347 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
348 ; GFX908: atomicrmw.start:
349 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
350 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
351 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
352 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
353 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
354 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
355 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
356 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
357 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
358 ; GFX908: atomicrmw.end:
359 ; GFX908-NEXT: ret double [[TMP5]]
361 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory(
362 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
363 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
364 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
365 ; GFX90A: atomicrmw.start:
366 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
367 ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
368 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
369 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
370 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
371 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
372 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
373 ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
374 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
375 ; GFX90A: atomicrmw.end:
376 ; GFX90A-NEXT: ret double [[TMP5]]
378 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory(
379 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
380 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
381 ; GFX940-NEXT: ret double [[RES]]
383 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory(
384 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
385 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
386 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
387 ; GFX10: atomicrmw.start:
388 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
389 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
390 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
391 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
392 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
393 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
394 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
395 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
396 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
397 ; GFX10: atomicrmw.end:
398 ; GFX10-NEXT: ret double [[TMP5]]
400 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory(
401 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
402 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
403 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
404 ; GFX11: atomicrmw.start:
405 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
406 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
407 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
408 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
409 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
410 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
411 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
412 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
413 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
414 ; GFX11: atomicrmw.end:
415 ; GFX11-NEXT: ret double [[TMP5]]
417 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory(
418 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
419 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
420 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
421 ; GFX12: atomicrmw.start:
422 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
423 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
424 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
425 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
426 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
427 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
428 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
429 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
430 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
431 ; GFX12: atomicrmw.end:
432 ; GFX12-NEXT: ret double [[TMP5]]
434 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
438 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
439 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
440 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
441 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
442 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
443 ; GFX803: atomicrmw.start:
444 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
445 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
446 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
447 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
448 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
449 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
450 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
451 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
452 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
453 ; GFX803: atomicrmw.end:
454 ; GFX803-NEXT: ret double [[TMP5]]
456 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
457 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
458 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
459 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
460 ; GFX906: atomicrmw.start:
461 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
462 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
463 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
464 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
465 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
466 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
467 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
468 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
469 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
470 ; GFX906: atomicrmw.end:
471 ; GFX906-NEXT: ret double [[TMP5]]
473 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
474 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
475 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
476 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
477 ; GFX908: atomicrmw.start:
478 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
479 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
480 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
481 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
482 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
483 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
484 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
485 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
486 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
487 ; GFX908: atomicrmw.end:
488 ; GFX908-NEXT: ret double [[TMP5]]
490 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
491 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
492 ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
493 ; GFX90A-NEXT: ret double [[TMP5]]
495 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
496 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
497 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
498 ; GFX940-NEXT: ret double [[RES]]
500 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
501 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
502 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
503 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
504 ; GFX10: atomicrmw.start:
505 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
506 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
507 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
508 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
509 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
510 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
511 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
512 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
513 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
514 ; GFX10: atomicrmw.end:
515 ; GFX10-NEXT: ret double [[TMP5]]
517 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
518 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
519 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
520 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
521 ; GFX11: atomicrmw.start:
522 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
523 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
524 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
525 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
526 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
527 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
528 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
529 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
530 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
531 ; GFX11: atomicrmw.end:
532 ; GFX11-NEXT: ret double [[TMP5]]
534 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
535 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
536 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
537 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
538 ; GFX12: atomicrmw.start:
539 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
540 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
541 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
542 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
543 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
544 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
545 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
546 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
547 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
548 ; GFX12: atomicrmw.end:
549 ; GFX12-NEXT: ret double [[TMP5]]
551 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
555 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(ptr addrspace(1) %ptr, double %value) #0 {
556 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
557 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
558 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
559 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
560 ; GFX803: atomicrmw.start:
561 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
562 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
563 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
564 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
565 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
566 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
567 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
568 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
569 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
570 ; GFX803: atomicrmw.end:
571 ; GFX803-NEXT: ret double [[TMP5]]
573 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
574 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
575 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
576 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
577 ; GFX906: atomicrmw.start:
578 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
579 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
580 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
581 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
582 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
583 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
584 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
585 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
586 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
587 ; GFX906: atomicrmw.end:
588 ; GFX906-NEXT: ret double [[TMP5]]
590 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
591 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
592 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
593 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
594 ; GFX908: atomicrmw.start:
595 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
596 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
597 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
598 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
599 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
600 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
601 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
602 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
603 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
604 ; GFX908: atomicrmw.end:
605 ; GFX908-NEXT: ret double [[TMP5]]
607 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
608 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
609 ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
610 ; GFX90A-NEXT: ret double [[TMP5]]
612 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
613 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
614 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
615 ; GFX940-NEXT: ret double [[RES]]
617 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
618 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
619 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
620 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
621 ; GFX10: atomicrmw.start:
622 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
623 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
624 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
625 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
626 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
627 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
628 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
629 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
630 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
631 ; GFX10: atomicrmw.end:
632 ; GFX10-NEXT: ret double [[TMP5]]
634 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
635 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
636 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
637 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
638 ; GFX11: atomicrmw.start:
639 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
640 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
641 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
642 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
643 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
644 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
645 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
646 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
647 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
648 ; GFX11: atomicrmw.end:
649 ; GFX11-NEXT: ret double [[TMP5]]
651 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
652 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
653 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
654 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
655 ; GFX12: atomicrmw.start:
656 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
657 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
658 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
659 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
660 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
661 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
662 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
663 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
664 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
665 ; GFX12: atomicrmw.end:
666 ; GFX12-NEXT: ret double [[TMP5]]
668 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
672 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(ptr addrspace(1) %ptr, double %value) #1 {
673 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
674 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
675 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
676 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
677 ; GFX803: atomicrmw.start:
678 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
679 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
680 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
681 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
682 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
683 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
684 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
685 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
686 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
687 ; GFX803: atomicrmw.end:
688 ; GFX803-NEXT: ret double [[TMP5]]
690 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
691 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
692 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
693 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
694 ; GFX906: atomicrmw.start:
695 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
696 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
697 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
698 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
699 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
700 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
701 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
702 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
703 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
704 ; GFX906: atomicrmw.end:
705 ; GFX906-NEXT: ret double [[TMP5]]
707 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
708 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
709 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
710 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
711 ; GFX908: atomicrmw.start:
712 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
713 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
714 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
715 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
716 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
717 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
718 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
719 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
720 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
721 ; GFX908: atomicrmw.end:
722 ; GFX908-NEXT: ret double [[TMP5]]
724 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
725 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
726 ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
727 ; GFX90A-NEXT: ret double [[TMP5]]
729 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
730 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
731 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
732 ; GFX940-NEXT: ret double [[RES]]
734 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
735 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
736 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
737 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
738 ; GFX10: atomicrmw.start:
739 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
740 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
741 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
742 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
743 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
744 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
745 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
746 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
747 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
748 ; GFX10: atomicrmw.end:
749 ; GFX10-NEXT: ret double [[TMP5]]
751 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
752 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
753 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
754 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
755 ; GFX11: atomicrmw.start:
756 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
757 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
758 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
759 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
760 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
761 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
762 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
763 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
764 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
765 ; GFX11: atomicrmw.end:
766 ; GFX11-NEXT: ret double [[TMP5]]
768 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
769 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
770 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
771 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
772 ; GFX12: atomicrmw.start:
773 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
774 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
775 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
776 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
777 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
778 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
779 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
780 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
781 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
782 ; GFX12: atomicrmw.end:
783 ; GFX12-NEXT: ret double [[TMP5]]
785 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
789 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
790 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(
791 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
792 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
793 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
794 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
795 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
796 ; COMMON: atomicrmw.start:
797 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
798 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
799 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
800 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
801 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
802 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
803 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
804 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
805 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
806 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
807 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
808 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
809 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
810 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
811 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
812 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
813 ; COMMON: atomicrmw.end:
814 ; COMMON-NEXT: ret double [[NEWLOADED]]
816 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
820 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
821 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
822 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
823 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
824 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
825 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
826 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
827 ; COMMON: atomicrmw.start:
828 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
829 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
830 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
831 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
832 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
833 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
834 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
835 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
836 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
837 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
838 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
839 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
840 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
841 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
842 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
843 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
844 ; COMMON: atomicrmw.end:
845 ; COMMON-NEXT: ret double [[NEWLOADED]]
847 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
851 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
852 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
853 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
854 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
855 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
856 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
857 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
858 ; COMMON: atomicrmw.start:
859 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
860 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
861 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
862 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
863 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
864 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
865 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
866 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
867 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
868 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
869 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
870 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
871 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
872 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
873 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
874 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
875 ; COMMON: atomicrmw.end:
876 ; COMMON-NEXT: ret double [[NEWLOADED]]
878 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
882 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
883 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
884 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
885 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
886 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
887 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
888 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
889 ; COMMON: atomicrmw.start:
890 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
891 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
892 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
893 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
894 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
895 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
896 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
897 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
898 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
899 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
900 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
901 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
902 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
903 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
904 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
905 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
906 ; COMMON: atomicrmw.end:
907 ; COMMON-NEXT: ret double [[NEWLOADED]]
909 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
913 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, double %value) #0 {
914 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
915 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
916 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
917 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
918 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
919 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
920 ; COMMON: atomicrmw.start:
921 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
922 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
923 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
924 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
925 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
926 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
927 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
928 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
929 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
930 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
931 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
932 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
933 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
934 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
935 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
936 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
937 ; COMMON: atomicrmw.end:
938 ; COMMON-NEXT: ret double [[NEWLOADED]]
940 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
944 define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, double %value) #1 {
945 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
946 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
947 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
948 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
949 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
950 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
951 ; COMMON: atomicrmw.start:
952 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
953 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
954 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
955 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
956 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
957 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
958 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
959 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
960 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
961 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
962 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
963 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
964 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
965 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
966 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
967 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
968 ; COMMON: atomicrmw.end:
969 ; COMMON-NEXT: ret double [[NEWLOADED]]
971 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
975 ;---------------------------------------------------------------------
977 ;---------------------------------------------------------------------
979 define double @test_atomicrmw_fsub_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
980 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent(
981 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
982 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
983 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
984 ; COMMON: atomicrmw.start:
985 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
986 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
987 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
988 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
989 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
990 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
991 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
992 ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
993 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
994 ; COMMON: atomicrmw.end:
995 ; COMMON-NEXT: ret double [[RES]]
997 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
1001 define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
1002 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory(
1003 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1004 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1005 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1006 ; COMMON: atomicrmw.start:
1007 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
1008 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1009 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1010 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1011 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1012 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
1013 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
1014 ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
1015 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1016 ; COMMON: atomicrmw.end:
1017 ; COMMON-NEXT: ret double [[RES]]
1019 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
1023 define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1024 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_remote_memory(
1025 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1026 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1027 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1028 ; COMMON: atomicrmw.start:
1029 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
1030 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1031 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1032 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1033 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
1034 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
1035 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
1036 ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
1037 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1038 ; COMMON: atomicrmw.end:
1039 ; COMMON-NEXT: ret double [[RES]]
1041 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
1045 define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1046 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1047 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1048 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1049 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1050 ; COMMON: atomicrmw.start:
1051 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
1052 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1053 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1054 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1055 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1056 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
1057 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
1058 ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
1059 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1060 ; COMMON: atomicrmw.end:
1061 ; COMMON-NEXT: ret double [[RES]]
1063 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
1067 define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
1068 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode(
1069 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1070 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1071 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
1072 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1073 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1074 ; COMMON: atomicrmw.start:
1075 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1076 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1077 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1078 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1079 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1080 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1081 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
1082 ; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1083 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1084 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1085 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1086 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
1087 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
1088 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
1089 ; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
1090 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1091 ; COMMON: atomicrmw.end:
1092 ; COMMON-NEXT: ret double [[TMP5]]
1094 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
1098 define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
1099 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
1100 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1101 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1102 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
1103 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1104 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1105 ; COMMON: atomicrmw.start:
1106 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1107 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1108 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1109 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1110 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1111 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1112 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
1113 ; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1114 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1115 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1116 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1117 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
1118 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
1119 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
1120 ; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
1121 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1122 ; COMMON: atomicrmw.end:
1123 ; COMMON-NEXT: ret double [[TMP5]]
1125 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
1129 define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1130 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
1131 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1132 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1133 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
1134 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1135 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1136 ; COMMON: atomicrmw.start:
1137 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1138 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1139 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1140 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1141 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1142 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1143 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
1144 ; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1145 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1146 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1147 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1148 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
1149 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
1150 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
1151 ; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
1152 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1153 ; COMMON: atomicrmw.end:
1154 ; COMMON-NEXT: ret double [[TMP5]]
1156 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1160 define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1161 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1162 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1163 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1164 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
1165 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1166 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1167 ; COMMON: atomicrmw.start:
1168 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1169 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1170 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1171 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1172 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1173 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1174 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
1175 ; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1176 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1177 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1178 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1179 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
1180 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
1181 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
1182 ; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
1183 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1184 ; COMMON: atomicrmw.end:
1185 ; COMMON-NEXT: ret double [[TMP5]]
1187 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1191 ;---------------------------------------------------------------------
1193 ;---------------------------------------------------------------------
1195 define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
1196 ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
1197 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1198 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1199 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1200 ; GFX803: atomicrmw.start:
1201 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1202 ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1203 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1204 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1205 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1206 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1207 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1208 ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1209 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1210 ; GFX803: atomicrmw.end:
1211 ; GFX803-NEXT: ret double [[TMP6]]
1213 ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
1214 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1215 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1216 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1217 ; GFX906: atomicrmw.start:
1218 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1219 ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1220 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1221 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1222 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1223 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1224 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1225 ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1226 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1227 ; GFX906: atomicrmw.end:
1228 ; GFX906-NEXT: ret double [[TMP6]]
1230 ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
1231 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1232 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1233 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1234 ; GFX908: atomicrmw.start:
1235 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1236 ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1237 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1238 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1239 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1240 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1241 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1242 ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1243 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1244 ; GFX908: atomicrmw.end:
1245 ; GFX908-NEXT: ret double [[TMP6]]
1247 ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
1248 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1249 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1250 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
1251 ; GFX90A: atomicrmw.start:
1252 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1253 ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1254 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1255 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1256 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1257 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1258 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1259 ; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1260 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1261 ; GFX90A: atomicrmw.end:
1262 ; GFX90A-NEXT: ret double [[TMP6]]
1264 ; GFX940-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
1265 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1266 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8
1267 ; GFX940-NEXT: ret double [[RES]]
1269 ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
1270 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1271 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1272 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1273 ; GFX10: atomicrmw.start:
1274 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1275 ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1276 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1277 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1278 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1279 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1280 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1281 ; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1282 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1283 ; GFX10: atomicrmw.end:
1284 ; GFX10-NEXT: ret double [[TMP6]]
1286 ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
1287 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1288 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1289 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
1290 ; GFX11: atomicrmw.start:
1291 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1292 ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1293 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1294 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1295 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1296 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1297 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1298 ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1299 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1300 ; GFX11: atomicrmw.end:
1301 ; GFX11-NEXT: ret double [[TMP6]]
1303 ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent(
1304 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1305 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1306 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
1307 ; GFX12: atomicrmw.start:
1308 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1309 ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1310 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1311 ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1312 ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1313 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1314 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1315 ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1316 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1317 ; GFX12: atomicrmw.end:
1318 ; GFX12-NEXT: ret double [[TMP6]]
1320 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
1324 define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
1325 ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
1326 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1327 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1328 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1329 ; GFX803: atomicrmw.start:
1330 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1331 ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1332 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1333 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1334 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1335 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1336 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1337 ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1338 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1339 ; GFX803: atomicrmw.end:
1340 ; GFX803-NEXT: ret double [[TMP6]]
1342 ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
1343 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1344 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1345 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1346 ; GFX906: atomicrmw.start:
1347 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1348 ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1349 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1350 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1351 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1352 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1353 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1354 ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1355 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1356 ; GFX906: atomicrmw.end:
1357 ; GFX906-NEXT: ret double [[TMP6]]
1359 ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
1360 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1361 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1362 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1363 ; GFX908: atomicrmw.start:
1364 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1365 ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1366 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1367 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1368 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1369 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1370 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1371 ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1372 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1373 ; GFX908: atomicrmw.end:
1374 ; GFX908-NEXT: ret double [[TMP6]]
1376 ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
1377 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1378 ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1379 ; GFX90A-NEXT: ret double [[RES]]
1381 ; GFX940-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
1382 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1383 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1384 ; GFX940-NEXT: ret double [[RES]]
1386 ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
1387 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1388 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1389 ; GFX10-NEXT: ret double [[RES]]
1391 ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
1392 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1393 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1394 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
1395 ; GFX11: atomicrmw.start:
1396 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1397 ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1398 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1399 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1400 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1401 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1402 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1403 ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1404 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1405 ; GFX11: atomicrmw.end:
1406 ; GFX11-NEXT: ret double [[TMP6]]
1408 ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(
1409 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1410 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1411 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
1412 ; GFX12: atomicrmw.start:
1413 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1414 ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1415 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1416 ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1417 ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1418 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1419 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1420 ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1421 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1422 ; GFX12: atomicrmw.end:
1423 ; GFX12-NEXT: ret double [[TMP6]]
1425 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
1429 define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1430 ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
1431 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1432 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1433 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1434 ; GFX803: atomicrmw.start:
1435 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1436 ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1437 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1438 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1439 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
1440 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1441 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1442 ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1443 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1444 ; GFX803: atomicrmw.end:
1445 ; GFX803-NEXT: ret double [[TMP6]]
1447 ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
1448 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1449 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1450 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1451 ; GFX906: atomicrmw.start:
1452 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1453 ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1454 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1455 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1456 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
1457 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1458 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1459 ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1460 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1461 ; GFX906: atomicrmw.end:
1462 ; GFX906-NEXT: ret double [[TMP6]]
1464 ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
1465 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1466 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1467 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1468 ; GFX908: atomicrmw.start:
1469 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1470 ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1471 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1472 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1473 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
1474 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1475 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1476 ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1477 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1478 ; GFX908: atomicrmw.end:
1479 ; GFX908-NEXT: ret double [[TMP6]]
1481 ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
1482 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1483 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1484 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
1485 ; GFX90A: atomicrmw.start:
1486 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
1487 ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1488 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1489 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1490 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
1491 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1492 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1493 ; GFX90A-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
1494 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1495 ; GFX90A: atomicrmw.end:
1496 ; GFX90A-NEXT: ret double [[RES]]
1498 ; GFX940-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
1499 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1500 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
1501 ; GFX940-NEXT: ret double [[RES]]
1503 ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
1504 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1505 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1506 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1507 ; GFX10: atomicrmw.start:
1508 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
1509 ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1510 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1511 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1512 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
1513 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1514 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1515 ; GFX10-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
1516 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1517 ; GFX10: atomicrmw.end:
1518 ; GFX10-NEXT: ret double [[RES]]
1520 ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
1521 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1522 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1523 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
1524 ; GFX11: atomicrmw.start:
1525 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1526 ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1527 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1528 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1529 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
1530 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1531 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1532 ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1533 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1534 ; GFX11: atomicrmw.end:
1535 ; GFX11-NEXT: ret double [[TMP6]]
1537 ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(
1538 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1539 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1540 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
1541 ; GFX12: atomicrmw.start:
1542 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1543 ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1544 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1545 ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1546 ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
1547 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1548 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1549 ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1550 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1551 ; GFX12: atomicrmw.end:
1552 ; GFX12-NEXT: ret double [[TMP6]]
1554 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
1558 define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1559 ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1560 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1561 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1562 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1563 ; GFX803: atomicrmw.start:
1564 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1565 ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1566 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1567 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1568 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1569 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1570 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1571 ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1572 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1573 ; GFX803: atomicrmw.end:
1574 ; GFX803-NEXT: ret double [[TMP6]]
1576 ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1577 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1578 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1579 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1580 ; GFX906: atomicrmw.start:
1581 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1582 ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1583 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1584 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1585 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1586 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1587 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1588 ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1589 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1590 ; GFX906: atomicrmw.end:
1591 ; GFX906-NEXT: ret double [[TMP6]]
1593 ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1594 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1595 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1596 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1597 ; GFX908: atomicrmw.start:
1598 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1599 ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1600 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1601 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1602 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1603 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1604 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1605 ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1606 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1607 ; GFX908: atomicrmw.end:
1608 ; GFX908-NEXT: ret double [[TMP6]]
1610 ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1611 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1612 ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1613 ; GFX90A-NEXT: ret double [[RES]]
1615 ; GFX940-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1616 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1617 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1618 ; GFX940-NEXT: ret double [[RES]]
1620 ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1621 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1622 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1623 ; GFX10-NEXT: ret double [[RES]]
1625 ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1626 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1627 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1628 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
1629 ; GFX11: atomicrmw.start:
1630 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1631 ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1632 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1633 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1634 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1635 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1636 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1637 ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1638 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1639 ; GFX11: atomicrmw.end:
1640 ; GFX11-NEXT: ret double [[TMP6]]
1642 ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1643 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1644 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1645 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
1646 ; GFX12: atomicrmw.start:
1647 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1648 ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1649 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1650 ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1651 ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
1652 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1653 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1654 ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1655 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1656 ; GFX12: atomicrmw.end:
1657 ; GFX12-NEXT: ret double [[TMP6]]
1659 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
1663 define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
1664 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(
1665 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1666 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1667 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1668 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1669 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1670 ; COMMON: atomicrmw.start:
1671 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1672 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1673 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1674 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1675 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1676 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1677 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1678 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1679 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1680 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1681 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1682 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1683 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1684 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1685 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1686 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1687 ; COMMON: atomicrmw.end:
1688 ; COMMON-NEXT: ret double [[TMP6]]
1690 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
1694 define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
1695 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
1696 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1697 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1698 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1699 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1700 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1701 ; COMMON: atomicrmw.start:
1702 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1703 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1704 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1705 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1706 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1707 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1708 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1709 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1710 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1711 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1712 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1713 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1714 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1715 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1716 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1717 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1718 ; COMMON: atomicrmw.end:
1719 ; COMMON-NEXT: ret double [[TMP6]]
1721 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
1725 define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1726 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
1727 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1728 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1729 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1730 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1731 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1732 ; COMMON: atomicrmw.start:
1733 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1734 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1735 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1736 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1737 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1738 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1739 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1740 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1741 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1742 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1743 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1744 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1745 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1746 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1747 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1748 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1749 ; COMMON: atomicrmw.end:
1750 ; COMMON-NEXT: ret double [[TMP6]]
1752 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1756 define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1757 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1758 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1759 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1760 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1761 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1762 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1763 ; COMMON: atomicrmw.start:
1764 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1765 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1766 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1767 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1768 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1769 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1770 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1771 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1772 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1773 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1774 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1775 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1776 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1777 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1778 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1779 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1780 ; COMMON: atomicrmw.end:
1781 ; COMMON-NEXT: ret double [[TMP6]]
1783 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1787 ;---------------------------------------------------------------------
1789 ;---------------------------------------------------------------------
1791 define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
1792 ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
1793 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1794 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1795 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1796 ; GFX803: atomicrmw.start:
1797 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1798 ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1799 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1800 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1801 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1802 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1803 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1804 ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1805 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1806 ; GFX803: atomicrmw.end:
1807 ; GFX803-NEXT: ret double [[TMP6]]
1809 ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
1810 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1811 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1812 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1813 ; GFX906: atomicrmw.start:
1814 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1815 ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1816 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1817 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1818 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1819 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1820 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1821 ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1822 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1823 ; GFX906: atomicrmw.end:
1824 ; GFX906-NEXT: ret double [[TMP6]]
1826 ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
1827 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1828 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1829 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1830 ; GFX908: atomicrmw.start:
1831 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1832 ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1833 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1834 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1835 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1836 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1837 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1838 ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1839 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1840 ; GFX908: atomicrmw.end:
1841 ; GFX908-NEXT: ret double [[TMP6]]
1843 ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
1844 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1845 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1846 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
1847 ; GFX90A: atomicrmw.start:
1848 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1849 ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1850 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1851 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1852 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1853 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1854 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1855 ; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1856 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1857 ; GFX90A: atomicrmw.end:
1858 ; GFX90A-NEXT: ret double [[TMP6]]
1860 ; GFX940-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
1861 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1862 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8
1863 ; GFX940-NEXT: ret double [[RES]]
1865 ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
1866 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1867 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1868 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
1869 ; GFX10: atomicrmw.start:
1870 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1871 ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1872 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1873 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1874 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1875 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1876 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1877 ; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1878 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1879 ; GFX10: atomicrmw.end:
1880 ; GFX10-NEXT: ret double [[TMP6]]
1882 ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
1883 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1884 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1885 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
1886 ; GFX11: atomicrmw.start:
1887 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1888 ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1889 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1890 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1891 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1892 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1893 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1894 ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1895 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1896 ; GFX11: atomicrmw.end:
1897 ; GFX11-NEXT: ret double [[TMP6]]
1899 ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent(
1900 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1901 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1902 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
1903 ; GFX12: atomicrmw.start:
1904 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1905 ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1906 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1907 ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1908 ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1909 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1910 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1911 ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1912 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1913 ; GFX12: atomicrmw.end:
1914 ; GFX12-NEXT: ret double [[TMP6]]
1916 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
1920 define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
1921 ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
1922 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1923 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1924 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
1925 ; GFX803: atomicrmw.start:
1926 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1927 ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1928 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1929 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1930 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1931 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1932 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1933 ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1934 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1935 ; GFX803: atomicrmw.end:
1936 ; GFX803-NEXT: ret double [[TMP6]]
1938 ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
1939 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1940 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1941 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
1942 ; GFX906: atomicrmw.start:
1943 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1944 ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1945 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1946 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1947 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1948 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1949 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1950 ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1951 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1952 ; GFX906: atomicrmw.end:
1953 ; GFX906-NEXT: ret double [[TMP6]]
1955 ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
1956 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1957 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1958 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
1959 ; GFX908: atomicrmw.start:
1960 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1961 ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1962 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1963 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1964 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1965 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1966 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1967 ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1968 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1969 ; GFX908: atomicrmw.end:
1970 ; GFX908-NEXT: ret double [[TMP6]]
1972 ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
1973 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1974 ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1975 ; GFX90A-NEXT: ret double [[RES]]
1977 ; GFX940-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
1978 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1979 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1980 ; GFX940-NEXT: ret double [[RES]]
1982 ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
1983 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1984 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1985 ; GFX10-NEXT: ret double [[RES]]
1987 ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
1988 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1989 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1990 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
1991 ; GFX11: atomicrmw.start:
1992 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1993 ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1994 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1995 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1996 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
1997 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1998 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1999 ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2000 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2001 ; GFX11: atomicrmw.end:
2002 ; GFX11-NEXT: ret double [[TMP6]]
2004 ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(
2005 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2006 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2007 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
2008 ; GFX12: atomicrmw.start:
2009 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2010 ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2011 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2012 ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2013 ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
2014 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2015 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2016 ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2017 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2018 ; GFX12: atomicrmw.end:
2019 ; GFX12-NEXT: ret double [[TMP6]]
2021 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
2025 define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
2026 ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
2027 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2028 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2029 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
2030 ; GFX803: atomicrmw.start:
2031 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2032 ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2033 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2034 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2035 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
2036 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2037 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2038 ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2039 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2040 ; GFX803: atomicrmw.end:
2041 ; GFX803-NEXT: ret double [[TMP6]]
2043 ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
2044 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2045 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2046 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
2047 ; GFX906: atomicrmw.start:
2048 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2049 ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2050 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2051 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2052 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
2053 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2054 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2055 ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2056 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2057 ; GFX906: atomicrmw.end:
2058 ; GFX906-NEXT: ret double [[TMP6]]
2060 ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
2061 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2062 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2063 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
2064 ; GFX908: atomicrmw.start:
2065 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2066 ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2067 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2068 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2069 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
2070 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2071 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2072 ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2073 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2074 ; GFX908: atomicrmw.end:
2075 ; GFX908-NEXT: ret double [[TMP6]]
2077 ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
2078 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2079 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2080 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
2081 ; GFX90A: atomicrmw.start:
2082 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
2083 ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2084 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2085 ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2086 ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
2087 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2088 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2089 ; GFX90A-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
2090 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2091 ; GFX90A: atomicrmw.end:
2092 ; GFX90A-NEXT: ret double [[RES]]
2094 ; GFX940-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
2095 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2096 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
2097 ; GFX940-NEXT: ret double [[RES]]
2099 ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
2100 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2101 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2102 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
2103 ; GFX10: atomicrmw.start:
2104 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
2105 ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2106 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2107 ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2108 ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
2109 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2110 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2111 ; GFX10-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
2112 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2113 ; GFX10: atomicrmw.end:
2114 ; GFX10-NEXT: ret double [[RES]]
2116 ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
2117 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2118 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2119 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
2120 ; GFX11: atomicrmw.start:
2121 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2122 ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2123 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2124 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2125 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
2126 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2127 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2128 ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2129 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2130 ; GFX11: atomicrmw.end:
2131 ; GFX11-NEXT: ret double [[TMP6]]
2133 ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(
2134 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2135 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2136 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
2137 ; GFX12: atomicrmw.start:
2138 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2139 ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2140 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2141 ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2142 ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
2143 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2144 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2145 ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2146 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2147 ; GFX12: atomicrmw.end:
2148 ; GFX12-NEXT: ret double [[TMP6]]
2150 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
2154 define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
2155 ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2156 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2157 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2158 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
2159 ; GFX803: atomicrmw.start:
2160 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2161 ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2162 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2163 ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2164 ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2165 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2166 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2167 ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2168 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2169 ; GFX803: atomicrmw.end:
2170 ; GFX803-NEXT: ret double [[TMP6]]
2172 ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2173 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2174 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2175 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
2176 ; GFX906: atomicrmw.start:
2177 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2178 ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2179 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2180 ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2181 ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2182 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2183 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2184 ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2185 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2186 ; GFX906: atomicrmw.end:
2187 ; GFX906-NEXT: ret double [[TMP6]]
2189 ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2190 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2191 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2192 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
2193 ; GFX908: atomicrmw.start:
2194 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2195 ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2196 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2197 ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2198 ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2199 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2200 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2201 ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2202 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2203 ; GFX908: atomicrmw.end:
2204 ; GFX908-NEXT: ret double [[TMP6]]
2206 ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2207 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2208 ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2209 ; GFX90A-NEXT: ret double [[RES]]
2211 ; GFX940-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2212 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2213 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2214 ; GFX940-NEXT: ret double [[RES]]
2216 ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2217 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2218 ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2219 ; GFX10-NEXT: ret double [[RES]]
2221 ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2222 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2223 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2224 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
2225 ; GFX11: atomicrmw.start:
2226 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2227 ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2228 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2229 ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2230 ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2231 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2232 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2233 ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2234 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2235 ; GFX11: atomicrmw.end:
2236 ; GFX11-NEXT: ret double [[TMP6]]
2238 ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2239 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2240 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
2241 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
2242 ; GFX12: atomicrmw.start:
2243 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2244 ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2245 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
2246 ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
2247 ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
2248 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
2249 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
2250 ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
2251 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2252 ; GFX12: atomicrmw.end:
2253 ; GFX12-NEXT: ret double [[TMP6]]
2255 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
2259 define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
2260 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(
2261 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2262 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
2263 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
2264 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
2265 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2266 ; COMMON: atomicrmw.start:
2267 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2268 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2269 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
2270 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
2271 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
2272 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
2273 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
2274 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
2275 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
2276 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
2277 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
2278 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
2279 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
2280 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
2281 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
2282 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2283 ; COMMON: atomicrmw.end:
2284 ; COMMON-NEXT: ret double [[TMP6]]
2286 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
2290 define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
2291 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
2292 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2293 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
2294 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
2295 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
2296 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2297 ; COMMON: atomicrmw.start:
2298 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2299 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2300 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
2301 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
2302 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
2303 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
2304 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
2305 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
2306 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
2307 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
2308 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
2309 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
2310 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
2311 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
2312 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
2313 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2314 ; COMMON: atomicrmw.end:
2315 ; COMMON-NEXT: ret double [[TMP6]]
2317 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
2321 define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
2322 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
2323 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2324 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
2325 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
2326 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
2327 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2328 ; COMMON: atomicrmw.start:
2329 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2330 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2331 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
2332 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
2333 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
2334 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
2335 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
2336 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
2337 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
2338 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
2339 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
2340 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
2341 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
2342 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
2343 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
2344 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2345 ; COMMON: atomicrmw.end:
2346 ; COMMON-NEXT: ret double [[TMP6]]
2348 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
2352 define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
2353 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
2354 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
2355 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
2356 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
2357 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
2358 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
2359 ; COMMON: atomicrmw.start:
2360 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
2361 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
2362 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
2363 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
2364 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
2365 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
2366 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
2367 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
2368 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
2369 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
2370 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
2371 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
2372 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
2373 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
2374 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
2375 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
2376 ; COMMON: atomicrmw.end:
2377 ; COMMON-NEXT: ret double [[TMP6]]
2379 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
2383 attributes #0 = { "denormal-fp-mode"="preserve-sign,preserve-sign" }
2384 attributes #1 = { "denormal-fp-mode"="dynamic,dynamic" }
2388 ; GFX803: [[META0]] = !{}
2390 ; GFX906: [[META0]] = !{}
2392 ; GFX908: [[META0]] = !{}
2394 ; GFX90A: [[META0]] = !{}
2396 ; GFX940: [[META0]] = !{}
2398 ; GFX10: [[META0]] = !{}
2400 ; GFX11: [[META0]] = !{}
2402 ; GFX12: [[META0]] = !{}