1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2 ; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
3 ; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
5 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
6 target triple = "amdgcn--"
8 define void @loads(ptr addrspace(8) %buf) {
9 ; CHECK-LABEL: define void @loads
10 ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0:[0-9]+]] {
11 ; CHECK-NEXT: [[SCALAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
12 ; CHECK-NEXT: [[VEC2:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) align 8 [[BUF]], i32 16, i32 0, i32 0)
13 ; CHECK-NEXT: [[VEC4:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
14 ; CHECK-NEXT: [[NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 2), !nontemporal [[META0:![0-9]+]]
15 ; CHECK-NEXT: [[INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1:![0-9]+]]
16 ; CHECK-NEXT: [[NONTEMPORAL_INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1]], !nontemporal [[META0]]
17 ; CHECK-NEXT: [[VOLATILE:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
18 ; CHECK-NEXT: [[VOLATILE_NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483646), !nontemporal [[META0]]
19 ; CHECK-NEXT: fence syncscope("wavefront") release
20 ; CHECK-NEXT: [[ATOMIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483647)
21 ; CHECK-NEXT: fence syncscope("wavefront") acquire
22 ; CHECK-NEXT: [[ATOMIC_MONOTONIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 1)
23 ; CHECK-NEXT: [[ATOMIC_ACQUIRE:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 1)
24 ; CHECK-NEXT: fence acquire
25 ; CHECK-NEXT: ret void
27 %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
28 %p = getelementptr float, ptr addrspace(7) %base, i32 4
30 %scalar = load float, ptr addrspace(7) %p, align 4
31 %vec2 = load <2 x float>, ptr addrspace(7) %p, align 8
32 %vec4 = load <4 x float>, ptr addrspace(7) %p, align 16
34 %nontemporal = load float, ptr addrspace(7) %p, !nontemporal !0
35 %invariant = load float, ptr addrspace(7) %p, !invariant.load !1
36 %nontemporal.invariant = load float, ptr addrspace(7) %p, !nontemporal !0, !invariant.load !1
38 %volatile = load volatile float, ptr addrspace(7) %p
39 %volatile.nontemporal = load volatile float, ptr addrspace(7) %p, !nontemporal !0
41 %atomic = load atomic volatile float, ptr addrspace(7) %p syncscope("wavefront") seq_cst, align 4
42 %atomic.monotonic = load atomic float, ptr addrspace(7) %p syncscope("wavefront") monotonic, align 4
43 %atomic.acquire = load atomic float, ptr addrspace(7) %p acquire, align 4
48 define void @stores(ptr addrspace(8) %buf, float %f, <4 x float> %f4) {
49 ; CHECK-LABEL: define void @stores
50 ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], <4 x float> [[F4:%.*]]) #[[ATTR0]] {
51 ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
52 ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[F4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
53 ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 2), !nontemporal [[META0]]
54 ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
55 ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483646), !nontemporal [[META0]]
56 ; CHECK-NEXT: fence syncscope("wavefront") release
57 ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483647)
58 ; CHECK-NEXT: fence syncscope("wavefront") acquire
59 ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 1)
60 ; CHECK-NEXT: fence release
61 ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 1)
62 ; CHECK-NEXT: ret void
64 %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
65 %p = getelementptr float, ptr addrspace(7) %base, i32 4
67 store float %f, ptr addrspace(7) %p, align 4
68 store <4 x float> %f4, ptr addrspace(7) %p, align 16
70 store float %f, ptr addrspace(7) %p, !nontemporal !0
72 store volatile float %f, ptr addrspace(7) %p
73 store volatile float %f, ptr addrspace(7) %p, !nontemporal !0
75 store atomic volatile float %f, ptr addrspace(7) %p syncscope("wavefront") seq_cst, align 4
76 store atomic float %f, ptr addrspace(7) %p syncscope("wavefront") monotonic, align 4
77 store atomic float %f, ptr addrspace(7) %p release, align 4
82 define void @atomicrmw(ptr addrspace(8) %buf, float %f, i32 %i) {
83 ; CHECK-LABEL: define void @atomicrmw
84 ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], i32 [[I:%.*]]) #[[ATTR0]] {
85 ; CHECK-NEXT: fence syncscope("wavefront") release
86 ; CHECK-NEXT: [[XCHG:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
87 ; CHECK-NEXT: fence syncscope("wavefront") acquire
88 ; CHECK-NEXT: fence syncscope("wavefront") release
89 ; CHECK-NEXT: [[ADD:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
90 ; CHECK-NEXT: fence syncscope("wavefront") acquire
91 ; CHECK-NEXT: fence syncscope("wavefront") release
92 ; CHECK-NEXT: [[SUB:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
93 ; CHECK-NEXT: fence syncscope("wavefront") acquire
94 ; CHECK-NEXT: fence syncscope("wavefront") release
95 ; CHECK-NEXT: [[AND:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
96 ; CHECK-NEXT: fence syncscope("wavefront") acquire
97 ; CHECK-NEXT: fence syncscope("wavefront") release
98 ; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
99 ; CHECK-NEXT: fence syncscope("wavefront") acquire
100 ; CHECK-NEXT: fence syncscope("wavefront") release
101 ; CHECK-NEXT: [[XOR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
102 ; CHECK-NEXT: fence syncscope("wavefront") acquire
103 ; CHECK-NEXT: fence syncscope("wavefront") release
104 ; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
105 ; CHECK-NEXT: fence syncscope("wavefront") acquire
106 ; CHECK-NEXT: fence syncscope("wavefront") release
107 ; CHECK-NEXT: [[MAX:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
108 ; CHECK-NEXT: fence syncscope("wavefront") acquire
109 ; CHECK-NEXT: fence syncscope("wavefront") release
110 ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
111 ; CHECK-NEXT: fence syncscope("wavefront") acquire
112 ; CHECK-NEXT: fence syncscope("wavefront") release
113 ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
114 ; CHECK-NEXT: fence syncscope("wavefront") acquire
115 ; CHECK-NEXT: fence syncscope("wavefront") release
116 ; CHECK-NEXT: [[FADD:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
117 ; CHECK-NEXT: fence syncscope("wavefront") acquire
118 ; CHECK-NEXT: fence syncscope("wavefront") release
119 ; CHECK-NEXT: [[FMAX:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
120 ; CHECK-NEXT: fence syncscope("wavefront") acquire
121 ; CHECK-NEXT: fence syncscope("wavefront") release
122 ; CHECK-NEXT: [[FMIN:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
123 ; CHECK-NEXT: fence syncscope("wavefront") acquire
124 ; CHECK-NEXT: fence syncscope("wavefront") release
125 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
126 ; CHECK-NEXT: fence syncscope("wavefront") acquire
127 ; CHECK-NEXT: ret void
129 %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
130 %p = getelementptr float, ptr addrspace(7) %base, i32 4
132 ; Fence insertion is tested by loads and stores
133 %xchg = atomicrmw xchg ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
134 %add = atomicrmw add ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
135 %sub = atomicrmw sub ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
136 %and = atomicrmw and ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
137 %or = atomicrmw or ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
138 %xor = atomicrmw xor ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
139 %min = atomicrmw min ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
140 %max = atomicrmw max ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
141 %umin = atomicrmw umin ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
142 %umax = atomicrmw umax ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
144 %fadd = atomicrmw fadd ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
145 %fmax = atomicrmw fmax ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
146 %fmin = atomicrmw fmin ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
148 ; Check a no-return atomic
149 atomicrmw add ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
154 define {i32, i1} @cmpxchg(ptr addrspace(8) %buf, i32 %wanted, i32 %new) {
155 ; CHECK-LABEL: define { i32, i1 } @cmpxchg
156 ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[WANTED:%.*]], i32 [[NEW:%.*]]) #[[ATTR0]] {
157 ; CHECK-NEXT: fence syncscope("wavefront") release
158 ; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 [[NEW]], i32 [[WANTED]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
159 ; CHECK-NEXT: fence syncscope("wavefront") acquire
160 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } poison, i32 [[RET]], 0
161 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[RET]], [[WANTED]]
162 ; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 [[TMP2]], 1
163 ; CHECK-NEXT: ret { i32, i1 } [[TMP3]]
165 %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
166 %p = getelementptr i32, ptr addrspace(7) %base, i32 4
168 %ret = cmpxchg volatile ptr addrspace(7) %p, i32 %wanted, i32 %new syncscope("wavefront") acq_rel monotonic, align 4
172 define {i32, i1} @cmpxchg_weak(ptr addrspace(8) %buf, i32 %wanted, i32 %new) {
173 ; CHECK-LABEL: define { i32, i1 } @cmpxchg_weak
174 ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[WANTED:%.*]], i32 [[NEW:%.*]]) #[[ATTR0]] {
175 ; CHECK-NEXT: fence syncscope("wavefront") release
176 ; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 [[NEW]], i32 [[WANTED]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
177 ; CHECK-NEXT: fence syncscope("wavefront") acquire
178 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } poison, i32 [[RET]], 0
179 ; CHECK-NEXT: ret { i32, i1 } [[TMP1]]
181 %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
182 %p = getelementptr i32, ptr addrspace(7) %base, i32 4
184 %ret = cmpxchg weak ptr addrspace(7) %p, i32 %wanted, i32 %new syncscope("wavefront") acq_rel monotonic, align 4