1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx900 %s | FileCheck -check-prefixes=CHECK,GFX900 %s
3 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx90a %s | FileCheck -check-prefixes=CHECK,GFX90A %s
4 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx940 %s | FileCheck -check-prefixes=CHECK,GFX940 %s
6 ;---------------------------------------------------------------------
8 ;---------------------------------------------------------------------
10 define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
11 ; CHECK-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align2(
12 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
13 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
14 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
15 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
16 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
17 ; CHECK: atomicrmw.start:
18 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
19 ; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x half> [[LOADED]], [[VALUE]]
20 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
21 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
22 ; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
23 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
24 ; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
25 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
26 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
27 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
28 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
29 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0
30 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
31 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1
32 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0
33 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
34 ; CHECK: atomicrmw.end:
35 ; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
37 %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
41 define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
42 ; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align2(
43 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
44 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
45 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
46 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
47 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
48 ; CHECK: atomicrmw.start:
49 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
50 ; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x bfloat> [[LOADED]], [[VALUE]]
51 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
52 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
53 ; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
54 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
55 ; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
56 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
57 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
58 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
59 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
60 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0
61 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
62 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1
63 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0
64 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
65 ; CHECK: atomicrmw.end:
66 ; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
68 %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
72 define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
73 ; GFX900-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(
74 ; GFX900-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
75 ; GFX900-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4
76 ; GFX900-NEXT: br label [[ATOMICRMW_START:%.*]]
77 ; GFX900: atomicrmw.start:
78 ; GFX900-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
79 ; GFX900-NEXT: [[NEW:%.*]] = fadd <2 x half> [[LOADED]], [[VALUE]]
80 ; GFX900-NEXT: [[TMP2:%.*]] = bitcast <2 x half> [[NEW]] to i32
81 ; GFX900-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[LOADED]] to i32
82 ; GFX900-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
83 ; GFX900-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
84 ; GFX900-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
85 ; GFX900-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x half>
86 ; GFX900-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
87 ; GFX900: atomicrmw.end:
88 ; GFX900-NEXT: ret <2 x half> [[TMP5]]
90 ; GFX90A-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(
91 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
92 ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4
93 ; GFX90A-NEXT: ret <2 x half> [[RES]]
95 ; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(
96 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
97 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4
98 ; GFX940-NEXT: ret <2 x half> [[RES]]
100 %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4
104 define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
105 ; GFX900-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4(
106 ; GFX900-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
107 ; GFX900-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4
108 ; GFX900-NEXT: br label [[ATOMICRMW_START:%.*]]
109 ; GFX900: atomicrmw.start:
110 ; GFX900-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
111 ; GFX900-NEXT: [[NEW:%.*]] = fadd <2 x bfloat> [[LOADED]], [[VALUE]]
112 ; GFX900-NEXT: [[TMP2:%.*]] = bitcast <2 x bfloat> [[NEW]] to i32
113 ; GFX900-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32
114 ; GFX900-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
115 ; GFX900-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
116 ; GFX900-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
117 ; GFX900-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat>
118 ; GFX900-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
119 ; GFX900: atomicrmw.end:
120 ; GFX900-NEXT: ret <2 x bfloat> [[TMP5]]
122 ; GFX90A-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4(
123 ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
124 ; GFX90A-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4
125 ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]]
126 ; GFX90A: atomicrmw.start:
127 ; GFX90A-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
128 ; GFX90A-NEXT: [[NEW:%.*]] = fadd <2 x bfloat> [[LOADED]], [[VALUE]]
129 ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast <2 x bfloat> [[NEW]] to i32
130 ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32
131 ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
132 ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
133 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
134 ; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat>
135 ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
136 ; GFX90A: atomicrmw.end:
137 ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]]
139 ; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4(
140 ; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
141 ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4
142 ; GFX940-NEXT: ret <2 x bfloat> [[RES]]
144 %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4
145 ret <2 x bfloat> %res
148 define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
149 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align2(
150 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
151 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
152 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
153 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
154 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
155 ; CHECK: atomicrmw.start:
156 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
157 ; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
158 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
159 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
160 ; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
161 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
162 ; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
163 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
164 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
165 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
166 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
167 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
168 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
169 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
170 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
171 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
172 ; CHECK: atomicrmw.end:
173 ; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
175 %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
179 define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
180 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align2(
181 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
182 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
183 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
184 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
185 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
186 ; CHECK: atomicrmw.start:
187 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
188 ; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
189 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
190 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
191 ; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
192 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
193 ; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
194 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
195 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
196 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
197 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
198 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
199 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
200 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
201 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
202 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
203 ; CHECK: atomicrmw.end:
204 ; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
206 %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
207 ret <4 x bfloat> %res
210 define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
211 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align4(
212 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
213 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
214 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
215 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
216 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
217 ; CHECK: atomicrmw.start:
218 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
219 ; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
220 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
221 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
222 ; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
223 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
224 ; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
225 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
226 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
227 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
228 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
229 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
230 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
231 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
232 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
233 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
234 ; CHECK: atomicrmw.end:
235 ; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
237 %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
241 define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
242 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align4(
243 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
244 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
245 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
246 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
247 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
248 ; CHECK: atomicrmw.start:
249 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
250 ; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
251 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
252 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
253 ; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
254 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
255 ; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
256 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
257 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
258 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
259 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
260 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
261 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
262 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
263 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
264 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
265 ; CHECK: atomicrmw.end:
266 ; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
268 %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
269 ret <4 x bfloat> %res
272 define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
273 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align8(
274 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
275 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8
276 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
277 ; CHECK: atomicrmw.start:
278 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
279 ; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
280 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[NEW]] to i64
281 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LOADED]] to i64
282 ; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
283 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
284 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
285 ; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x half>
286 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
287 ; CHECK: atomicrmw.end:
288 ; CHECK-NEXT: ret <4 x half> [[TMP5]]
290 %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8
294 define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
295 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align8(
296 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
297 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8
298 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
299 ; CHECK: atomicrmw.start:
300 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
301 ; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
302 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat> [[NEW]] to i64
303 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64
304 ; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
305 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
306 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
307 ; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat>
308 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
309 ; CHECK: atomicrmw.end:
310 ; CHECK-NEXT: ret <4 x bfloat> [[TMP5]]
312 %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8
313 ret <4 x bfloat> %res
316 define <2 x float> @test_atomicrmw_fadd_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) {
317 ; CHECK-LABEL: define <2 x float> @test_atomicrmw_fadd_v2f32_global_agent_align8(
318 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
319 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8
320 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
321 ; CHECK: atomicrmw.start:
322 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
323 ; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x float> [[LOADED]], [[VALUE]]
324 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[NEW]] to i64
325 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LOADED]] to i64
326 ; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
327 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
328 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
329 ; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <2 x float>
330 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
331 ; CHECK: atomicrmw.end:
332 ; CHECK-NEXT: ret <2 x float> [[TMP5]]
334 %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8
338 ;---------------------------------------------------------------------
340 ;---------------------------------------------------------------------
342 define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
343 ; CHECK-LABEL: define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align2(
344 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
345 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
346 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
347 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
348 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
349 ; CHECK: atomicrmw.start:
350 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
351 ; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x half> [[LOADED]], [[VALUE]]
352 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
353 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
354 ; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
355 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
356 ; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
357 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
358 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
359 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
360 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
361 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0
362 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
363 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1
364 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0
365 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
366 ; CHECK: atomicrmw.end:
367 ; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
369 %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
373 define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
374 ; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align2(
375 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
376 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
377 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
378 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
379 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
380 ; CHECK: atomicrmw.start:
381 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
382 ; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x bfloat> [[LOADED]], [[VALUE]]
383 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
384 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
385 ; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
386 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
387 ; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
388 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
389 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
390 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
391 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
392 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0
393 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
394 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1
395 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0
396 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
397 ; CHECK: atomicrmw.end:
398 ; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
400 %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
401 ret <2 x bfloat> %res
404 define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
405 ; CHECK-LABEL: define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align4(
406 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
407 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4
408 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
409 ; CHECK: atomicrmw.start:
410 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
411 ; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x half> [[LOADED]], [[VALUE]]
412 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x half> [[NEW]] to i32
413 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[LOADED]] to i32
414 ; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
415 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
416 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
417 ; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x half>
418 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
419 ; CHECK: atomicrmw.end:
420 ; CHECK-NEXT: ret <2 x half> [[TMP5]]
422 %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4
426 define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
427 ; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align4(
428 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
429 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4
430 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
431 ; CHECK: atomicrmw.start:
432 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
433 ; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x bfloat> [[LOADED]], [[VALUE]]
434 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x bfloat> [[NEW]] to i32
435 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32
436 ; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
437 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
438 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
439 ; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat>
440 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
441 ; CHECK: atomicrmw.end:
442 ; CHECK-NEXT: ret <2 x bfloat> [[TMP5]]
444 %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4
445 ret <2 x bfloat> %res
448 define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
449 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align2(
450 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
451 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
452 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
453 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
454 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
455 ; CHECK: atomicrmw.start:
456 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
457 ; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
458 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
459 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
460 ; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
461 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
462 ; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
463 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
464 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
465 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
466 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
467 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
468 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
469 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
470 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
471 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
472 ; CHECK: atomicrmw.end:
473 ; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
475 %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
479 define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
480 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align2(
481 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
482 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
483 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
484 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
485 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
486 ; CHECK: atomicrmw.start:
487 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
488 ; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
489 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
490 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
491 ; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
492 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
493 ; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
494 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
495 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
496 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
497 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
498 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
499 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
500 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
501 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
502 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
503 ; CHECK: atomicrmw.end:
504 ; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
506 %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
507 ret <4 x bfloat> %res
510 define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
511 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align4(
512 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
513 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
514 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
515 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
516 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
517 ; CHECK: atomicrmw.start:
518 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
519 ; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
520 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
521 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
522 ; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
523 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
524 ; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
525 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
526 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
527 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
528 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
529 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
530 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
531 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
532 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
533 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
534 ; CHECK: atomicrmw.end:
535 ; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
537 %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
541 define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
542 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align4(
543 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
544 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
545 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
546 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
547 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
548 ; CHECK: atomicrmw.start:
549 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
550 ; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
551 ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
552 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
553 ; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
554 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
555 ; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
556 ; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
557 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
558 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
559 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
560 ; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
561 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
562 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
563 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
564 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
565 ; CHECK: atomicrmw.end:
566 ; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
568 %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
569 ret <4 x bfloat> %res
572 define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
573 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align8(
574 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
575 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8
576 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
577 ; CHECK: atomicrmw.start:
578 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
579 ; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
580 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[NEW]] to i64
581 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LOADED]] to i64
582 ; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
583 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
584 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
585 ; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x half>
586 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
587 ; CHECK: atomicrmw.end:
588 ; CHECK-NEXT: ret <4 x half> [[TMP5]]
590 %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8
594 define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
595 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align8(
596 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
597 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8
598 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
599 ; CHECK: atomicrmw.start:
600 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
601 ; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
602 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat> [[NEW]] to i64
603 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64
604 ; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
605 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
606 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
607 ; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat>
608 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
609 ; CHECK: atomicrmw.end:
610 ; CHECK-NEXT: ret <4 x bfloat> [[TMP5]]
612 %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8
613 ret <4 x bfloat> %res
616 define <2 x float> @test_atomicrmw_fsub_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) {
617 ; CHECK-LABEL: define <2 x float> @test_atomicrmw_fsub_v2f32_global_agent_align8(
618 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
619 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8
620 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
621 ; CHECK: atomicrmw.start:
622 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
623 ; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x float> [[LOADED]], [[VALUE]]
624 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[NEW]] to i64
625 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LOADED]] to i64
626 ; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
627 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
628 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
629 ; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <2 x float>
630 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
631 ; CHECK: atomicrmw.end:
632 ; CHECK-NEXT: ret <2 x float> [[TMP5]]
634 %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8
638 ;---------------------------------------------------------------------
640 ;---------------------------------------------------------------------
642 define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
643 ; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align2(
644 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
645 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
646 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
647 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
648 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
649 ; CHECK: atomicrmw.start:
650 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
651 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
652 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
653 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
654 ; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
655 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
656 ; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
657 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
658 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
659 ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
660 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
661 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0
662 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
663 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1
664 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0
665 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
666 ; CHECK: atomicrmw.end:
667 ; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
669 %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
673 define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
674 ; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align2(
675 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
676 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
677 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
678 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
679 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
680 ; CHECK: atomicrmw.start:
681 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
682 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
683 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
684 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
685 ; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
686 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
687 ; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
688 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
689 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
690 ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
691 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
692 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0
693 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
694 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1
695 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0
696 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
697 ; CHECK: atomicrmw.end:
698 ; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
700 %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
701 ret <2 x bfloat> %res
704 define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
705 ; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align4(
706 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
707 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4
708 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
709 ; CHECK: atomicrmw.start:
710 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
711 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
712 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[TMP2]] to i32
713 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x half> [[LOADED]] to i32
714 ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
715 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
716 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
717 ; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x half>
718 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
719 ; CHECK: atomicrmw.end:
720 ; CHECK-NEXT: ret <2 x half> [[TMP6]]
722 %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4
726 define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
727 ; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align4(
728 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
729 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4
730 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
731 ; CHECK: atomicrmw.start:
732 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
733 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
734 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[TMP2]] to i32
735 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32
736 ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
737 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
738 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
739 ; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat>
740 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
741 ; CHECK: atomicrmw.end:
742 ; CHECK-NEXT: ret <2 x bfloat> [[TMP6]]
744 %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4
745 ret <2 x bfloat> %res
748 define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
749 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align2(
750 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
751 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
752 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
753 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
754 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
755 ; CHECK: atomicrmw.start:
756 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
757 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
758 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
759 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
760 ; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
761 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
762 ; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
763 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
764 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
765 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
766 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
767 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
768 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
769 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
770 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
771 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
772 ; CHECK: atomicrmw.end:
773 ; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
775 %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
779 define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
780 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align2(
781 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
782 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
783 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
784 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
785 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
786 ; CHECK: atomicrmw.start:
787 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
788 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
789 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
790 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
791 ; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
792 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
793 ; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
794 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
795 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
796 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
797 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
798 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
799 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
800 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
801 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
802 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
803 ; CHECK: atomicrmw.end:
804 ; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
806 %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
807 ret <4 x bfloat> %res
810 define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
811 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align4(
812 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
813 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
814 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
815 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
816 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
817 ; CHECK: atomicrmw.start:
818 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
819 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
820 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
821 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
822 ; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
823 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
824 ; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
825 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
826 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
827 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
828 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
829 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
830 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
831 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
832 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
833 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
834 ; CHECK: atomicrmw.end:
835 ; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
837 %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
841 define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
842 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align4(
843 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
844 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
845 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
846 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
847 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
848 ; CHECK: atomicrmw.start:
849 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
850 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
851 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
852 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
853 ; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
854 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
855 ; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
856 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
857 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
858 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
859 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
860 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
861 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
862 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
863 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
864 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
865 ; CHECK: atomicrmw.end:
866 ; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
868 %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
869 ret <4 x bfloat> %res
872 define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
873 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align8(
874 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
875 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8
876 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
877 ; CHECK: atomicrmw.start:
878 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
879 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
880 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP2]] to i64
881 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[LOADED]] to i64
882 ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
883 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
884 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
885 ; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x half>
886 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
887 ; CHECK: atomicrmw.end:
888 ; CHECK-NEXT: ret <4 x half> [[TMP6]]
890 %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8
894 define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
895 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align8(
896 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
897 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8
898 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
899 ; CHECK: atomicrmw.start:
900 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
901 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
902 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[TMP2]] to i64
903 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64
904 ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
905 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
906 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
907 ; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat>
908 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
909 ; CHECK: atomicrmw.end:
910 ; CHECK-NEXT: ret <4 x bfloat> [[TMP6]]
912 %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8
913 ret <4 x bfloat> %res
916 define <2 x float> @test_atomicrmw_fmin_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) {
917 ; CHECK-LABEL: define <2 x float> @test_atomicrmw_fmin_v2f32_global_agent_align8(
918 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
919 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8
920 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
921 ; CHECK: atomicrmw.start:
922 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
923 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.minnum.v2f32(<2 x float> [[LOADED]], <2 x float> [[VALUE]])
924 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP2]] to i64
925 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[LOADED]] to i64
926 ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
927 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
928 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
929 ; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <2 x float>
930 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
931 ; CHECK: atomicrmw.end:
932 ; CHECK-NEXT: ret <2 x float> [[TMP6]]
934 %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8
938 ;---------------------------------------------------------------------
940 ;---------------------------------------------------------------------
942 define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
943 ; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align2(
944 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
945 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
946 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
947 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
948 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
949 ; CHECK: atomicrmw.start:
950 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
951 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.maxnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
952 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
953 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
954 ; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
955 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
956 ; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
957 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
958 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
959 ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
960 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
961 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0
962 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
963 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1
964 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0
965 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
966 ; CHECK: atomicrmw.end:
967 ; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
969 %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
973 define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
974 ; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align2(
975 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
976 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
977 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
978 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
979 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
980 ; CHECK: atomicrmw.start:
981 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
982 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
983 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
984 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
985 ; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
986 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
987 ; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
988 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
989 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
990 ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
991 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
992 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0
993 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
994 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1
995 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0
996 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
997 ; CHECK: atomicrmw.end:
998 ; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
1000 %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
1001 ret <2 x bfloat> %res
1004 define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
1005 ; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align4(
1006 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
1007 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4
1008 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1009 ; CHECK: atomicrmw.start:
1010 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1011 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x half> @llvm.maxnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
1012 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[TMP2]] to i32
1013 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x half> [[LOADED]] to i32
1014 ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
1015 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
1016 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
1017 ; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x half>
1018 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1019 ; CHECK: atomicrmw.end:
1020 ; CHECK-NEXT: ret <2 x half> [[TMP6]]
1022 %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4
1026 define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
1027 ; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align4(
1028 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
1029 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4
1030 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1031 ; CHECK: atomicrmw.start:
1032 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1033 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
1034 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[TMP2]] to i32
1035 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32
1036 ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
1037 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
1038 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
1039 ; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat>
1040 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1041 ; CHECK: atomicrmw.end:
1042 ; CHECK-NEXT: ret <2 x bfloat> [[TMP6]]
1044 %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4
1045 ret <2 x bfloat> %res
1048 define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
1049 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align2(
1050 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
1051 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
1052 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
1053 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
1054 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1055 ; CHECK: atomicrmw.start:
1056 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1057 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
1058 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1059 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
1060 ; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
1061 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1062 ; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
1063 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1064 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1065 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
1066 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
1067 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
1068 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
1069 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
1070 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
1071 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1072 ; CHECK: atomicrmw.end:
1073 ; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
1075 %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
1079 define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
1080 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align2(
1081 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
1082 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
1083 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
1084 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
1085 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1086 ; CHECK: atomicrmw.start:
1087 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1088 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
1089 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1090 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
1091 ; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
1092 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1093 ; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
1094 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1095 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1096 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
1097 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
1098 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
1099 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
1100 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
1101 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
1102 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1103 ; CHECK: atomicrmw.end:
1104 ; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
1106 %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
1107 ret <4 x bfloat> %res
1110 define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
1111 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align4(
1112 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
1113 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
1114 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
1115 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
1116 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1117 ; CHECK: atomicrmw.start:
1118 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1119 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
1120 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1121 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
1122 ; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
1123 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1124 ; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
1125 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1126 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1127 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
1128 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
1129 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
1130 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
1131 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
1132 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
1133 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1134 ; CHECK: atomicrmw.end:
1135 ; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
1137 %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
1141 define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
1142 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align4(
1143 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
1144 ; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
1145 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
1146 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
1147 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1148 ; CHECK: atomicrmw.start:
1149 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1150 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
1151 ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
1152 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
1153 ; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
1154 ; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
1155 ; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
1156 ; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
1157 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
1158 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
1159 ; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
1160 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
1161 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
1162 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
1163 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
1164 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1165 ; CHECK: atomicrmw.end:
1166 ; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
1168 %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
1169 ret <4 x bfloat> %res
1172 define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
1173 ; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align8(
1174 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
1175 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8
1176 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1177 ; CHECK: atomicrmw.start:
1178 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1179 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
1180 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP2]] to i64
1181 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[LOADED]] to i64
1182 ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1183 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1184 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1185 ; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x half>
1186 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1187 ; CHECK: atomicrmw.end:
1188 ; CHECK-NEXT: ret <4 x half> [[TMP6]]
1190 %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8
1194 define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
1195 ; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align8(
1196 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
1197 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8
1198 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1199 ; CHECK: atomicrmw.start:
1200 ; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1201 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
1202 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[TMP2]] to i64
1203 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64
1204 ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1205 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1206 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1207 ; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat>
1208 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1209 ; CHECK: atomicrmw.end:
1210 ; CHECK-NEXT: ret <4 x bfloat> [[TMP6]]
1212 %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8
1213 ret <4 x bfloat> %res
1216 define <2 x float> @test_atomicrmw_fmax_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) {
1217 ; CHECK-LABEL: define <2 x float> @test_atomicrmw_fmax_v2f32_global_agent_align8(
1218 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
1219 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8
1220 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1221 ; CHECK: atomicrmw.start:
1222 ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
1223 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[LOADED]], <2 x float> [[VALUE]])
1224 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP2]] to i64
1225 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[LOADED]] to i64
1226 ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
1227 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
1228 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
1229 ; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <2 x float>
1230 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1231 ; CHECK: atomicrmw.end:
1232 ; CHECK-NEXT: ret <2 x float> [[TMP6]]
1234 %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8
1235 ret <2 x float> %res