1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX803 %s
3 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s
4 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s
5 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s
6 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s
7 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s
8 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s
9 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s
11 ;---------------------------------------------------------------------
13 ;---------------------------------------------------------------------
15 ; xchg is supported over PCIe, so no expansion is necessary
16 define double @test_atomicrmw_xchg_f64_global_system(ptr addrspace(1) %ptr, double %value) {
17 ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_system(
18 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
19 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8
20 ; COMMON-NEXT: ret double [[RES]]
22 %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value seq_cst
26 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
27 define double @test_atomicrmw_xchg_f64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
28 ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_system__amdgpu_no_fine_grained_memory(
29 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
30 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
31 ; COMMON-NEXT: ret double [[RES]]
33 %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0
37 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
38 define double @test_atomicrmw_xchg_f64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
39 ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_system__amdgpu_no_remote_memory(
40 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
41 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
42 ; COMMON-NEXT: ret double [[RES]]
44 %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.remote.memory !0
48 ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored.
49 define double @test_atomicrmw_xchg_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
50 ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
51 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
52 ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
53 ; COMMON-NEXT: ret double [[RES]]
55 %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
59 ;---------------------------------------------------------------------
61 ;---------------------------------------------------------------------
63 define double @test_atomicrmw_fadd_f64_global_system(ptr addrspace(1) %ptr, double %value) {
64 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_system(
65 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
66 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
67 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
68 ; GFX803: atomicrmw.start:
69 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
70 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
71 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
72 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
73 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
74 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
75 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
76 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
77 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
78 ; GFX803: atomicrmw.end:
79 ; GFX803-NEXT: ret double [[TMP5]]
81 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_system(
82 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
83 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
84 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
85 ; GFX906: atomicrmw.start:
86 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
87 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
88 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
89 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
90 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
91 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
92 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
93 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
94 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
95 ; GFX906: atomicrmw.end:
96 ; GFX906-NEXT: ret double [[TMP5]]
98 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_system(
99 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
100 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
101 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
102 ; GFX908: atomicrmw.start:
103 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
104 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
105 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
106 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
107 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
108 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
109 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
110 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
111 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
112 ; GFX908: atomicrmw.end:
113 ; GFX908-NEXT: ret double [[TMP5]]
115 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_system(
116 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
117 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
118 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
119 ; GFX90A: atomicrmw.start:
120 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
121 ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
122 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
123 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
124 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
125 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
126 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
127 ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
128 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
129 ; GFX90A: atomicrmw.end:
130 ; GFX90A-NEXT: ret double [[TMP5]]
132 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_system(
133 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
134 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8
135 ; GFX940-NEXT: ret double [[RES]]
137 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system(
138 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
139 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
140 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
141 ; GFX10: atomicrmw.start:
142 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
143 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
144 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
145 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
146 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
147 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
148 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
149 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
150 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
151 ; GFX10: atomicrmw.end:
152 ; GFX10-NEXT: ret double [[TMP5]]
154 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_system(
155 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
156 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
157 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
158 ; GFX11: atomicrmw.start:
159 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
160 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
161 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
162 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
163 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
164 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
165 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
166 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
167 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
168 ; GFX11: atomicrmw.end:
169 ; GFX11-NEXT: ret double [[TMP5]]
171 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_system(
172 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
173 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
174 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
175 ; GFX12: atomicrmw.start:
176 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
177 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
178 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
179 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
180 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
181 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
182 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
183 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
184 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
185 ; GFX12: atomicrmw.end:
186 ; GFX12-NEXT: ret double [[TMP5]]
188 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst
192 define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
193 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory(
194 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
195 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
196 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
197 ; GFX803: atomicrmw.start:
198 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
199 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
200 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
201 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
202 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
203 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
204 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
205 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
206 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
207 ; GFX803: atomicrmw.end:
208 ; GFX803-NEXT: ret double [[TMP5]]
210 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory(
211 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
212 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
213 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
214 ; GFX906: atomicrmw.start:
215 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
216 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
217 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
218 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
219 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
220 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
221 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
222 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
223 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
224 ; GFX906: atomicrmw.end:
225 ; GFX906-NEXT: ret double [[TMP5]]
227 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory(
228 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
229 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
230 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
231 ; GFX908: atomicrmw.start:
232 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
233 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
234 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
235 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
236 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
237 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
238 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
239 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
240 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
241 ; GFX908: atomicrmw.end:
242 ; GFX908-NEXT: ret double [[TMP5]]
244 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory(
245 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
246 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
247 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
248 ; GFX90A: atomicrmw.start:
249 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
250 ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
251 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
252 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
253 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
254 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
255 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
256 ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
257 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
258 ; GFX90A: atomicrmw.end:
259 ; GFX90A-NEXT: ret double [[TMP5]]
261 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory(
262 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
263 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
264 ; GFX940-NEXT: ret double [[RES]]
266 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory(
267 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
268 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
269 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
270 ; GFX10: atomicrmw.start:
271 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
272 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
273 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
274 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
275 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
276 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
277 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
278 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
279 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
280 ; GFX10: atomicrmw.end:
281 ; GFX10-NEXT: ret double [[TMP5]]
283 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory(
284 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
285 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
286 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
287 ; GFX11: atomicrmw.start:
288 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
289 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
290 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
291 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
292 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
293 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
294 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
295 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
296 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
297 ; GFX11: atomicrmw.end:
298 ; GFX11-NEXT: ret double [[TMP5]]
300 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory(
301 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
302 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
303 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
304 ; GFX12: atomicrmw.start:
305 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
306 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
307 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
308 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
309 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
310 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
311 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
312 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
313 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
314 ; GFX12: atomicrmw.end:
315 ; GFX12-NEXT: ret double [[TMP5]]
317 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0
321 define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
322 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory(
323 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
324 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
325 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
326 ; GFX803: atomicrmw.start:
327 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
328 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
329 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
330 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
331 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
332 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
333 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
334 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
335 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
336 ; GFX803: atomicrmw.end:
337 ; GFX803-NEXT: ret double [[TMP5]]
339 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory(
340 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
341 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
342 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
343 ; GFX906: atomicrmw.start:
344 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
345 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
346 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
347 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
348 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
349 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
350 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
351 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
352 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
353 ; GFX906: atomicrmw.end:
354 ; GFX906-NEXT: ret double [[TMP5]]
356 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory(
357 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
358 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
359 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
360 ; GFX908: atomicrmw.start:
361 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
362 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
363 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
364 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
365 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
366 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
367 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
368 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
369 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
370 ; GFX908: atomicrmw.end:
371 ; GFX908-NEXT: ret double [[TMP5]]
373 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory(
374 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
375 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
376 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
377 ; GFX90A: atomicrmw.start:
378 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
379 ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
380 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
381 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
382 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
383 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
384 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
385 ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
386 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
387 ; GFX90A: atomicrmw.end:
388 ; GFX90A-NEXT: ret double [[TMP5]]
390 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory(
391 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
392 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
393 ; GFX940-NEXT: ret double [[RES]]
395 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory(
396 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
397 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
398 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
399 ; GFX10: atomicrmw.start:
400 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
401 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
402 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
403 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
404 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
405 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
406 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
407 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
408 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
409 ; GFX10: atomicrmw.end:
410 ; GFX10-NEXT: ret double [[TMP5]]
412 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory(
413 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
414 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
415 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
416 ; GFX11: atomicrmw.start:
417 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
418 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
419 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
420 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
421 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
422 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
423 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
424 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
425 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
426 ; GFX11: atomicrmw.end:
427 ; GFX11-NEXT: ret double [[TMP5]]
429 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory(
430 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
431 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
432 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
433 ; GFX12: atomicrmw.start:
434 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
435 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
436 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
437 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
438 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
439 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
440 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
441 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
442 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
443 ; GFX12: atomicrmw.end:
444 ; GFX12-NEXT: ret double [[TMP5]]
446 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.remote.memory !0
450 define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
451 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
452 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
453 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
454 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
455 ; GFX803: atomicrmw.start:
456 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
457 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
458 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
459 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
460 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
461 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
462 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
463 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
464 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
465 ; GFX803: atomicrmw.end:
466 ; GFX803-NEXT: ret double [[TMP5]]
468 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
469 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
470 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
471 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
472 ; GFX906: atomicrmw.start:
473 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
474 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
475 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
476 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
477 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
478 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
479 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
480 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
481 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
482 ; GFX906: atomicrmw.end:
483 ; GFX906-NEXT: ret double [[TMP5]]
485 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
486 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
487 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
488 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
489 ; GFX908: atomicrmw.start:
490 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
491 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
492 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
493 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
494 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
495 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
496 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
497 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
498 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
499 ; GFX908: atomicrmw.end:
500 ; GFX908-NEXT: ret double [[TMP5]]
502 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
503 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
504 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
505 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
506 ; GFX90A: atomicrmw.start:
507 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
508 ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
509 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
510 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
511 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
512 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
513 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
514 ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
515 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
516 ; GFX90A: atomicrmw.end:
517 ; GFX90A-NEXT: ret double [[TMP5]]
519 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
520 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
521 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
522 ; GFX940-NEXT: ret double [[RES]]
524 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
525 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
526 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
527 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
528 ; GFX10: atomicrmw.start:
529 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
530 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
531 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
532 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
533 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
534 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
535 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
536 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
537 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
538 ; GFX10: atomicrmw.end:
539 ; GFX10-NEXT: ret double [[TMP5]]
541 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
542 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
543 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
544 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
545 ; GFX11: atomicrmw.start:
546 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
547 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
548 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
549 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
550 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
551 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
552 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
553 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
554 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
555 ; GFX11: atomicrmw.end:
556 ; GFX11-NEXT: ret double [[TMP5]]
558 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
559 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
560 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
561 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
562 ; GFX12: atomicrmw.start:
563 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
564 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
565 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
566 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
567 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
568 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
569 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
570 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
571 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
572 ; GFX12: atomicrmw.end:
573 ; GFX12-NEXT: ret double [[TMP5]]
575 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
579 define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(ptr addrspace(1) %ptr, double %value) #0 {
580 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
581 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
582 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
583 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
584 ; GFX803: atomicrmw.start:
585 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
586 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
587 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
588 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
589 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
590 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
591 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
592 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
593 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
594 ; GFX803: atomicrmw.end:
595 ; GFX803-NEXT: ret double [[TMP5]]
597 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
598 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
599 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
600 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
601 ; GFX906: atomicrmw.start:
602 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
603 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
604 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
605 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
606 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
607 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
608 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
609 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
610 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
611 ; GFX906: atomicrmw.end:
612 ; GFX906-NEXT: ret double [[TMP5]]
614 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
615 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
616 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
617 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
618 ; GFX908: atomicrmw.start:
619 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
620 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
621 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
622 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
623 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
624 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
625 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
626 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
627 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
628 ; GFX908: atomicrmw.end:
629 ; GFX908-NEXT: ret double [[TMP5]]
631 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
632 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
633 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
634 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
635 ; GFX90A: atomicrmw.start:
636 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
637 ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
638 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
639 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
640 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
641 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
642 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
643 ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
644 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
645 ; GFX90A: atomicrmw.end:
646 ; GFX90A-NEXT: ret double [[TMP5]]
648 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
649 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
650 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
651 ; GFX940-NEXT: ret double [[RES]]
653 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
654 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
655 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
656 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
657 ; GFX10: atomicrmw.start:
658 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
659 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
660 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
661 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
662 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
663 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
664 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
665 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
666 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
667 ; GFX10: atomicrmw.end:
668 ; GFX10-NEXT: ret double [[TMP5]]
670 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
671 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
672 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
673 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
674 ; GFX11: atomicrmw.start:
675 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
676 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
677 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
678 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
679 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
680 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
681 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
682 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
683 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
684 ; GFX11: atomicrmw.end:
685 ; GFX11-NEXT: ret double [[TMP5]]
687 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(
688 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
689 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
690 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
691 ; GFX12: atomicrmw.start:
692 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
693 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
694 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
695 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
696 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
697 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
698 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
699 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
700 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
701 ; GFX12: atomicrmw.end:
702 ; GFX12-NEXT: ret double [[TMP5]]
704 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
708 define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(ptr addrspace(1) %ptr, double %value) #1 {
709 ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
710 ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
711 ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
712 ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]]
713 ; GFX803: atomicrmw.start:
714 ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
715 ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
716 ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
717 ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
718 ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
719 ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
720 ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
721 ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
722 ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
723 ; GFX803: atomicrmw.end:
724 ; GFX803-NEXT: ret double [[TMP5]]
726 ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
727 ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
728 ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
729 ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]]
730 ; GFX906: atomicrmw.start:
731 ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
732 ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
733 ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
734 ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
735 ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
736 ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
737 ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
738 ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
739 ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
740 ; GFX906: atomicrmw.end:
741 ; GFX906-NEXT: ret double [[TMP5]]
743 ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
744 ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
745 ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
746 ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
747 ; GFX908: atomicrmw.start:
748 ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
749 ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
750 ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
751 ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
752 ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
753 ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
754 ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
755 ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
756 ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
757 ; GFX908: atomicrmw.end:
758 ; GFX908-NEXT: ret double [[TMP5]]
760 ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
761 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
762 ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
763 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
764 ; GFX90A: atomicrmw.start:
765 ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
766 ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
767 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
768 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
769 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
770 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
771 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
772 ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
773 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
774 ; GFX90A: atomicrmw.end:
775 ; GFX90A-NEXT: ret double [[TMP5]]
777 ; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
778 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
779 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
780 ; GFX940-NEXT: ret double [[RES]]
782 ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
783 ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
784 ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
785 ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]]
786 ; GFX10: atomicrmw.start:
787 ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
788 ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
789 ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
790 ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
791 ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
792 ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
793 ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
794 ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
795 ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
796 ; GFX10: atomicrmw.end:
797 ; GFX10-NEXT: ret double [[TMP5]]
799 ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
800 ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
801 ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
802 ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]]
803 ; GFX11: atomicrmw.start:
804 ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
805 ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
806 ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
807 ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
808 ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
809 ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
810 ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
811 ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
812 ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
813 ; GFX11: atomicrmw.end:
814 ; GFX11-NEXT: ret double [[TMP5]]
816 ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(
817 ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
818 ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
819 ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]]
820 ; GFX12: atomicrmw.start:
821 ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
822 ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
823 ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
824 ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
825 ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
826 ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
827 ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
828 ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
829 ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
830 ; GFX12: atomicrmw.end:
831 ; GFX12-NEXT: ret double [[TMP5]]
833 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
837 define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
838 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode(
839 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
840 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
841 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
842 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
843 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
844 ; COMMON: atomicrmw.start:
845 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
846 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
847 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
848 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
849 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
850 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
851 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
852 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
853 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
854 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
855 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
856 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
857 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
858 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
859 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
860 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
861 ; COMMON: atomicrmw.end:
862 ; COMMON-NEXT: ret double [[NEWLOADED]]
864 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
868 define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
869 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
870 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
871 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
872 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
873 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
874 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
875 ; COMMON: atomicrmw.start:
876 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
877 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
878 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
879 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
880 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
881 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
882 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
883 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
884 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
885 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
886 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
887 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
888 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
889 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
890 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
891 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
892 ; COMMON: atomicrmw.end:
893 ; COMMON-NEXT: ret double [[NEWLOADED]]
895 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
899 define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
900 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
901 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
902 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
903 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
904 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
905 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
906 ; COMMON: atomicrmw.start:
907 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
908 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
909 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
910 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
911 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
912 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
913 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
914 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
915 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
916 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
917 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
918 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
919 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
920 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
921 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
922 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
923 ; COMMON: atomicrmw.end:
924 ; COMMON-NEXT: ret double [[NEWLOADED]]
926 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
930 define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
931 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
932 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
933 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
934 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
935 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
936 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
937 ; COMMON: atomicrmw.start:
938 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
939 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
940 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
941 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
942 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
943 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
944 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
945 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
946 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
947 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
948 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
949 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
950 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
951 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
952 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
953 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
954 ; COMMON: atomicrmw.end:
955 ; COMMON-NEXT: ret double [[NEWLOADED]]
957 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
961 define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, double %value) #0 {
962 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(
963 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] {
964 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
965 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
966 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
967 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
968 ; COMMON: atomicrmw.start:
969 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
970 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
971 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
972 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
973 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
974 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
975 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
976 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
977 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
978 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
979 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
980 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
981 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
982 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
983 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
984 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
985 ; COMMON: atomicrmw.end:
986 ; COMMON-NEXT: ret double [[NEWLOADED]]
988 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
992 define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, double %value) #1 {
993 ; COMMON-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(
994 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] {
995 ; COMMON-NEXT: [[TMP1:%.*]] = alloca double, align 8, addrspace(5)
996 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
997 ; COMMON-NEXT: [[TMP3:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
998 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
999 ; COMMON: atomicrmw.start:
1000 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1001 ; COMMON-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]]
1002 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1003 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
1004 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
1005 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1006 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
1007 ; COMMON-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1008 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1009 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8
1010 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
1011 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
1012 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP5]], 1
1013 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
1014 ; COMMON-NEXT: [[NEWLOADED]] = extractvalue { double, i1 } [[TMP8]], 0
1015 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1016 ; COMMON: atomicrmw.end:
1017 ; COMMON-NEXT: ret double [[NEWLOADED]]
1019 %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1023 ;---------------------------------------------------------------------
1025 ;---------------------------------------------------------------------
1027 define double @test_atomicrmw_fsub_f64_global_system(ptr addrspace(1) %ptr, double %value) {
1028 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system(
1029 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1030 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1031 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1032 ; COMMON: atomicrmw.start:
1033 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
1034 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1035 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1036 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1037 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
1038 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
1039 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
1040 ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
1041 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1042 ; COMMON: atomicrmw.end:
1043 ; COMMON-NEXT: ret double [[RES]]
1045 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst
1049 define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
1050 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory(
1051 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1052 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1053 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1054 ; COMMON: atomicrmw.start:
1055 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
1056 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1057 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1058 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1059 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
1060 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
1061 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
1062 ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
1063 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1064 ; COMMON: atomicrmw.end:
1065 ; COMMON-NEXT: ret double [[RES]]
1067 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0
1071 define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1072 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_remote_memory(
1073 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1074 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1075 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1076 ; COMMON: atomicrmw.start:
1077 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
1078 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1079 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1080 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1081 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
1082 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
1083 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
1084 ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
1085 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1086 ; COMMON: atomicrmw.end:
1087 ; COMMON-NEXT: ret double [[RES]]
1089 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.remote.memory !0
1093 define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1094 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1095 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1096 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1097 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1098 ; COMMON: atomicrmw.start:
1099 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ]
1100 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1101 ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64
1102 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
1103 ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8
1104 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
1105 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
1106 ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double
1107 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1108 ; COMMON: atomicrmw.end:
1109 ; COMMON-NEXT: ret double [[RES]]
1111 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
1115 define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
1116 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode(
1117 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1118 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1119 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
1120 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1121 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1122 ; COMMON: atomicrmw.start:
1123 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1124 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1125 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1126 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1127 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1128 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1129 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
1130 ; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1131 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1132 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1133 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1134 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
1135 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
1136 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
1137 ; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
1138 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1139 ; COMMON: atomicrmw.end:
1140 ; COMMON-NEXT: ret double [[TMP5]]
1142 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
1146 define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
1147 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
1148 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1149 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1150 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
1151 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1152 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1153 ; COMMON: atomicrmw.start:
1154 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1155 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1156 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1157 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1158 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1159 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1160 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
1161 ; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1162 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1163 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1164 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1165 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
1166 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
1167 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
1168 ; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
1169 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1170 ; COMMON: atomicrmw.end:
1171 ; COMMON-NEXT: ret double [[TMP5]]
1173 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
1177 define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1178 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
1179 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1180 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1181 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
1182 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1183 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1184 ; COMMON: atomicrmw.start:
1185 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1186 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1187 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1188 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1189 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1190 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1191 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
1192 ; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1193 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1194 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1195 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1196 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
1197 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
1198 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
1199 ; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
1200 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1201 ; COMMON: atomicrmw.end:
1202 ; COMMON-NEXT: ret double [[TMP5]]
1204 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1208 define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1209 ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1210 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1211 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1212 ; COMMON-NEXT: [[TMP2:%.*]] = alloca double, align 8, addrspace(5)
1213 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1214 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1215 ; COMMON: atomicrmw.start:
1216 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
1217 ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]]
1218 ; COMMON-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1219 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1220 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1221 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1222 ; COMMON-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP2]], align 8
1223 ; COMMON-NEXT: [[TMP9:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1224 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1225 ; COMMON-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1226 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1227 ; COMMON-NEXT: [[TMP7:%.*]] = insertvalue { double, i1 } poison, double [[TMP6]], 0
1228 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP7]], i1 [[TMP9]], 1
1229 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP8]], 1
1230 ; COMMON-NEXT: [[TMP5]] = extractvalue { double, i1 } [[TMP8]], 0
1231 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1232 ; COMMON: atomicrmw.end:
1233 ; COMMON-NEXT: ret double [[TMP5]]
1235 %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1239 ;---------------------------------------------------------------------
1241 ;---------------------------------------------------------------------
1243 define double @test_atomicrmw_fmax_f64_global_system(ptr addrspace(1) %ptr, double %value) {
1244 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system(
1245 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1246 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1247 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1248 ; COMMON: atomicrmw.start:
1249 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1250 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1251 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1252 ; COMMON-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1253 ; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
1254 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1255 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1256 ; COMMON-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1257 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1258 ; COMMON: atomicrmw.end:
1259 ; COMMON-NEXT: ret double [[TMP6]]
1261 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst
1265 define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
1266 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory(
1267 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1268 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1269 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1270 ; COMMON: atomicrmw.start:
1271 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1272 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1273 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1274 ; COMMON-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1275 ; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
1276 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1277 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1278 ; COMMON-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1279 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1280 ; COMMON: atomicrmw.end:
1281 ; COMMON-NEXT: ret double [[TMP6]]
1283 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0
1287 define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1288 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(
1289 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1290 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1291 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1292 ; COMMON: atomicrmw.start:
1293 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1294 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1295 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1296 ; COMMON-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1297 ; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
1298 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1299 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1300 ; COMMON-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1301 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1302 ; COMMON: atomicrmw.end:
1303 ; COMMON-NEXT: ret double [[TMP6]]
1305 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.remote.memory !0
1309 define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1310 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1311 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1312 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1313 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1314 ; COMMON: atomicrmw.start:
1315 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1316 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1317 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1318 ; COMMON-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1319 ; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
1320 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1321 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1322 ; COMMON-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1323 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1324 ; COMMON: atomicrmw.end:
1325 ; COMMON-NEXT: ret double [[TMP6]]
1327 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
1331 define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
1332 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode(
1333 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1334 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1335 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1336 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1337 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1338 ; COMMON: atomicrmw.start:
1339 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1340 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1341 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1342 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1343 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1344 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1345 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1346 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1347 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1348 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1349 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1350 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1351 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1352 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1353 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1354 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1355 ; COMMON: atomicrmw.end:
1356 ; COMMON-NEXT: ret double [[TMP6]]
1358 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
1362 define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
1363 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
1364 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1365 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1366 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1367 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1368 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1369 ; COMMON: atomicrmw.start:
1370 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1371 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1372 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1373 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1374 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1375 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1376 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1377 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1378 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1379 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1380 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1381 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1382 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1383 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1384 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1385 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1386 ; COMMON: atomicrmw.end:
1387 ; COMMON-NEXT: ret double [[TMP6]]
1389 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
1393 define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1394 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
1395 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1396 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1397 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1398 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1399 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1400 ; COMMON: atomicrmw.start:
1401 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1402 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1403 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1404 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1405 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1406 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1407 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1408 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1409 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1410 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1411 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1412 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1413 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1414 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1415 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1416 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1417 ; COMMON: atomicrmw.end:
1418 ; COMMON-NEXT: ret double [[TMP6]]
1420 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1424 define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1425 ; COMMON-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1426 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1427 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1428 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1429 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1430 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1431 ; COMMON: atomicrmw.start:
1432 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1433 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]])
1434 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1435 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1436 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1437 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1438 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1439 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1440 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1441 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1442 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1443 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1444 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1445 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1446 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1447 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1448 ; COMMON: atomicrmw.end:
1449 ; COMMON-NEXT: ret double [[TMP6]]
1451 %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1455 ;---------------------------------------------------------------------
1457 ;---------------------------------------------------------------------
1459 define double @test_atomicrmw_fmin_f64_global_system(ptr addrspace(1) %ptr, double %value) {
1460 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system(
1461 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1462 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1463 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1464 ; COMMON: atomicrmw.start:
1465 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1466 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1467 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1468 ; COMMON-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1469 ; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
1470 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1471 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1472 ; COMMON-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1473 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1474 ; COMMON: atomicrmw.end:
1475 ; COMMON-NEXT: ret double [[TMP6]]
1477 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst
1481 define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
1482 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory(
1483 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1484 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1485 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1486 ; COMMON: atomicrmw.start:
1487 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1488 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1489 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1490 ; COMMON-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1491 ; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
1492 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1493 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1494 ; COMMON-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1495 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1496 ; COMMON: atomicrmw.end:
1497 ; COMMON-NEXT: ret double [[TMP6]]
1499 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0
1503 define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1504 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(
1505 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1506 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1507 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1508 ; COMMON: atomicrmw.start:
1509 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1510 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1511 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1512 ; COMMON-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1513 ; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
1514 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1515 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1516 ; COMMON-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1517 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1518 ; COMMON: atomicrmw.end:
1519 ; COMMON-NEXT: ret double [[TMP6]]
1521 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.remote.memory !0
1525 define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1526 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1527 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1528 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8
1529 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1530 ; COMMON: atomicrmw.start:
1531 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1532 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1533 ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
1534 ; COMMON-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64
1535 ; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8
1536 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1537 ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1538 ; COMMON-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double
1539 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1540 ; COMMON: atomicrmw.end:
1541 ; COMMON-NEXT: ret double [[TMP6]]
1543 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
1547 define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) {
1548 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode(
1549 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1550 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1551 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1552 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1553 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1554 ; COMMON: atomicrmw.start:
1555 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1556 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1557 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1558 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1559 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1560 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1561 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1562 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1563 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1564 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1565 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1566 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1567 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1568 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1569 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1570 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1571 ; COMMON: atomicrmw.end:
1572 ; COMMON-NEXT: ret double [[TMP6]]
1574 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0
1578 define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
1579 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(
1580 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1581 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1582 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1583 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1584 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1585 ; COMMON: atomicrmw.start:
1586 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1587 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1588 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1589 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1590 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1591 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1592 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1593 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1594 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1595 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1596 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1597 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1598 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1599 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1600 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1601 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1602 ; COMMON: atomicrmw.end:
1603 ; COMMON-NEXT: ret double [[TMP6]]
1605 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
1609 define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1610 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(
1611 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1612 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1613 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1614 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1615 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1616 ; COMMON: atomicrmw.start:
1617 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1618 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1619 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1620 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1621 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1622 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1623 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1624 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1625 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1626 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1627 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1628 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1629 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1630 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1631 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1632 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1633 ; COMMON: atomicrmw.end:
1634 ; COMMON-NEXT: ret double [[TMP6]]
1636 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1640 define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
1641 ; COMMON-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
1642 ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
1643 ; COMMON-NEXT: [[TMP3:%.*]] = alloca double, align 8, addrspace(5)
1644 ; COMMON-NEXT: [[TMP4:%.*]] = alloca double, align 8, addrspace(5)
1645 ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 4
1646 ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]]
1647 ; COMMON: atomicrmw.start:
1648 ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1649 ; COMMON-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]])
1650 ; COMMON-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1651 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP3]])
1652 ; COMMON-NEXT: store double [[LOADED]], ptr addrspace(5) [[TMP3]], align 8
1653 ; COMMON-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP4]])
1654 ; COMMON-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP4]], align 8
1655 ; COMMON-NEXT: [[TMP10:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[TMP4]], i32 5, i32 5)
1656 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP4]])
1657 ; COMMON-NEXT: [[TMP7:%.*]] = load double, ptr addrspace(5) [[TMP3]], align 8
1658 ; COMMON-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP3]])
1659 ; COMMON-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } poison, double [[TMP7]], 0
1660 ; COMMON-NEXT: [[TMP9:%.*]] = insertvalue { double, i1 } [[TMP8]], i1 [[TMP10]], 1
1661 ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { double, i1 } [[TMP9]], 1
1662 ; COMMON-NEXT: [[TMP6]] = extractvalue { double, i1 } [[TMP9]], 0
1663 ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1664 ; COMMON: atomicrmw.end:
1665 ; COMMON-NEXT: ret double [[TMP6]]
1667 %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0
1671 attributes #0 = { "denormal-fp-mode"="preserve-sign,preserve-sign" }
1672 attributes #1 = { "denormal-fp-mode"="dynamic,dynamic" }
1676 ; GFX803: [[META0]] = !{}
1678 ; GFX906: [[META0]] = !{}
1680 ; GFX908: [[META0]] = !{}
1682 ; GFX90A: [[META0]] = !{}
1684 ; GFX940: [[META0]] = !{}
1686 ; GFX10: [[META0]] = !{}
1688 ; GFX11: [[META0]] = !{}
1690 ; GFX12: [[META0]] = !{}