1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s
3 ; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S | FileCheck %s -check-prefix=DISABLED
5 ; Support ASan instrumentation for constant-mask llvm.vp.{load,store}
7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 declare void @llvm.vp.store.v4f32.p0(<4 x float>, ptr, <4 x i1>, i32) argmemonly nounwind
12 define void @store.v4f32.variable(ptr align 4 %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address {
13 ; CHECK-LABEL: @store.v4f32.variable(
14 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
15 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
17 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
18 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
19 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
21 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
22 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]]
23 ; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
25 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
26 ; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
27 ; CHECK-NEXT: call void @__asan_store4(i64 [[TMP8]])
28 ; CHECK-NEXT: br label [[TMP9]]
30 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
31 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
32 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
33 ; CHECK: .split.split:
34 ; CHECK-NEXT: br label [[TMP10]]
36 ; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]])
37 ; CHECK-NEXT: ret void
39 ; DISABLED-LABEL: @store.v4f32.variable(
40 ; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
41 ; DISABLED-NEXT: ret void
43 tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> %mask, i32 %evl)
47 ;; Store using two vp.stores, which should instrument them both.
48 define void @store.v4f32.1010.split(ptr align 4 %p, <4 x float> %arg, i32 %evl) sanitize_address {
49 ; CHECK-LABEL: @store.v4f32.1010.split(
50 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
51 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
53 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
54 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
55 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
57 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
58 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i64 [[IV]]
59 ; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
61 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
62 ; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
63 ; CHECK-NEXT: call void @__asan_store4(i64 [[TMP8]])
64 ; CHECK-NEXT: br label [[TMP9]]
66 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
67 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
68 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
69 ; CHECK: .split.split:
70 ; CHECK-NEXT: br label [[TMP10]]
72 ; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 [[EVL]])
73 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i32 [[EVL]], 0
74 ; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP20:%.*]]
76 ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[EVL]] to i64
77 ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP13]], i64 4)
78 ; CHECK-NEXT: br label [[DOTSPLIT1:%.*]]
80 ; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ 0, [[TMP12]] ], [ [[IV2_NEXT:%.*]], [[TMP19:%.*]] ]
81 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i64 [[IV2]]
82 ; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP19]]
84 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV2]]
85 ; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
86 ; CHECK-NEXT: call void @__asan_store4(i64 [[TMP18]])
87 ; CHECK-NEXT: br label [[TMP19]]
89 ; CHECK-NEXT: [[IV2_NEXT]] = add nuw nsw i64 [[IV2]], 1
90 ; CHECK-NEXT: [[IV2_CHECK:%.*]] = icmp eq i64 [[IV2_NEXT]], [[TMP14]]
91 ; CHECK-NEXT: br i1 [[IV2_CHECK]], label [[DOTSPLIT1_SPLIT:%.*]], label [[DOTSPLIT1]]
92 ; CHECK: .split1.split:
93 ; CHECK-NEXT: br label [[TMP20]]
95 ; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL]])
96 ; CHECK-NEXT: ret void
98 ; DISABLED-LABEL: @store.v4f32.1010.split(
99 ; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 [[EVL:%.*]])
100 ; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL]])
101 ; DISABLED-NEXT: ret void
103 tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 %evl)
104 tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 %evl)
108 ;; Store using a vp.store after a full store. Shouldn't instrument the second one.
109 define void @store.v4f32.0010.after.full.store(ptr align 4 %p, <4 x float> %arg, i32 %evl) sanitize_address {
110 ; CHECK-LABEL: @store.v4f32.0010.after.full.store(
111 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
112 ; CHECK-NEXT: call void @__asan_store16(i64 [[TMP1]])
113 ; CHECK-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P]], align 16
114 ; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL:%.*]])
115 ; CHECK-NEXT: ret void
117 ; DISABLED-LABEL: @store.v4f32.0010.after.full.store(
118 ; DISABLED-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P:%.*]], align 16
119 ; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL:%.*]])
120 ; DISABLED-NEXT: ret void
122 store <4 x float> %arg, ptr %p
123 tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 %evl)
127 ;;;;;;;;;;;;;;;; LOAD
128 declare <4 x float> @llvm.vp.load.v4f32.p0(ptr, <4 x i1>, i32) argmemonly nounwind
129 declare <8 x i32> @llvm.vp.load.v8i32.p0(ptr, <8 x i1>, i32) argmemonly nounwind
131 define <4 x float> @load.v4f32.variable(ptr align 4 %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address {
132 ; CHECK-LABEL: @load.v4f32.variable(
133 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
134 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
136 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
137 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
138 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
140 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
141 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]]
142 ; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
144 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
145 ; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
146 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP8]])
147 ; CHECK-NEXT: br label [[TMP9]]
149 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
150 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
151 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
152 ; CHECK: .split.split:
153 ; CHECK-NEXT: br label [[TMP10]]
155 ; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]])
156 ; CHECK-NEXT: ret <4 x float> [[RES]]
158 ; DISABLED-LABEL: @load.v4f32.variable(
159 ; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
160 ; DISABLED-NEXT: ret <4 x float> [[RES]]
162 %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> %mask, i32 %evl)
166 ;; Load using two vp.loads, which should instrument them both.
167 define <4 x float> @load.v4f32.1001.split(ptr align 4 %p, i32 %evl) sanitize_address {
168 ; CHECK-LABEL: @load.v4f32.1001.split(
169 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
170 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
172 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
173 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
174 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
176 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
177 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i64 [[IV]]
178 ; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
180 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
181 ; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
182 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP8]])
183 ; CHECK-NEXT: br label [[TMP9]]
185 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
186 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
187 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
188 ; CHECK: .split.split:
189 ; CHECK-NEXT: br label [[TMP10]]
191 ; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 [[EVL]])
192 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i32 [[EVL]], 0
193 ; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP20:%.*]]
195 ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[EVL]] to i64
196 ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP13]], i64 4)
197 ; CHECK-NEXT: br label [[DOTSPLIT1:%.*]]
199 ; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ 0, [[TMP12]] ], [ [[IV2_NEXT:%.*]], [[TMP19:%.*]] ]
200 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i64 [[IV2]]
201 ; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP19]]
203 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV2]]
204 ; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
205 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP18]])
206 ; CHECK-NEXT: br label [[TMP19]]
208 ; CHECK-NEXT: [[IV2_NEXT]] = add nuw nsw i64 [[IV2]], 1
209 ; CHECK-NEXT: [[IV2_CHECK:%.*]] = icmp eq i64 [[IV2_NEXT]], [[TMP14]]
210 ; CHECK-NEXT: br i1 [[IV2_CHECK]], label [[DOTSPLIT1_SPLIT:%.*]], label [[DOTSPLIT1]]
211 ; CHECK: .split1.split:
212 ; CHECK-NEXT: br label [[TMP20]]
214 ; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL]])
215 ; CHECK-NEXT: ret <4 x float> [[RES2]]
217 ; DISABLED-LABEL: @load.v4f32.1001.split(
218 ; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 [[EVL:%.*]])
219 ; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL]])
220 ; DISABLED-NEXT: ret <4 x float> [[RES2]]
222 %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 %evl)
223 %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 %evl)
224 ret <4 x float> %res2
227 ;; Load using a vp.load after a full load. Shouldn't instrument the second one.
228 define <4 x float> @load.v4f32.1001.after.full.load(ptr align 4 %p, i32 %evl) sanitize_address {
229 ; CHECK-LABEL: @load.v4f32.1001.after.full.load(
230 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
231 ; CHECK-NEXT: call void @__asan_load16(i64 [[TMP1]])
232 ; CHECK-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P]], align 16
233 ; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL:%.*]])
234 ; CHECK-NEXT: ret <4 x float> [[RES2]]
236 ; DISABLED-LABEL: @load.v4f32.1001.after.full.load(
237 ; DISABLED-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
238 ; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL:%.*]])
239 ; DISABLED-NEXT: ret <4 x float> [[RES2]]
241 %res = load <4 x float>, ptr %p
242 %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 %evl)
243 ret <4 x float> %res2
246 ;; Scalable vector tests
247 ;; ---------------------------
248 declare <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr, <vscale x 4 x i1>, i32)
249 declare void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float>, ptr, <vscale x 4 x i1>, i32)
251 define <vscale x 4 x float> @scalable.load.nxv4f32(ptr align 4 %p, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
252 ; CHECK-LABEL: @scalable.load.nxv4f32(
253 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
254 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
256 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
257 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
258 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
259 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
260 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
262 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
263 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
264 ; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
266 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
267 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
268 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP10]])
269 ; CHECK-NEXT: br label [[TMP11]]
271 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
272 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
273 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
274 ; CHECK: .split.split:
275 ; CHECK-NEXT: br label [[TMP12]]
277 ; CHECK-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr [[P]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
278 ; CHECK-NEXT: ret <vscale x 4 x float> [[RES]]
280 ; DISABLED-LABEL: @scalable.load.nxv4f32(
281 ; DISABLED-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr [[P:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
282 ; DISABLED-NEXT: ret <vscale x 4 x float> [[RES]]
284 %res = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr %p, <vscale x 4 x i1> %mask, i32 %evl)
285 ret <vscale x 4 x float> %res
288 define void @scalable.store.nxv4f32(ptr align 4 %p, <vscale x 4 x float> %arg, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
289 ; CHECK-LABEL: @scalable.store.nxv4f32(
290 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
291 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
293 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
294 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
295 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
296 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
297 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
299 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
300 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
301 ; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
303 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
304 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
305 ; CHECK-NEXT: call void @__asan_store4(i64 [[TMP10]])
306 ; CHECK-NEXT: br label [[TMP11]]
308 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
309 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
310 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
311 ; CHECK: .split.split:
312 ; CHECK-NEXT: br label [[TMP12]]
314 ; CHECK-NEXT: tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.*]], ptr [[P]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
315 ; CHECK-NEXT: ret void
317 ; DISABLED-LABEL: @scalable.store.nxv4f32(
318 ; DISABLED-NEXT: tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.*]], ptr [[P:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
319 ; DISABLED-NEXT: ret void
321 tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> %arg, ptr %p, <vscale x 4 x i1> %mask, i32 %evl)
325 ; Test vp strided load stores.
326 declare <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr, i32, <vscale x 4 x i1>, i32)
327 declare void @llvm.experimental.vp.strided.store.nxv4f32.i32(<vscale x 4 x float>, ptr, i32, <vscale x 4 x i1>, i32)
329 define <vscale x 4 x float> @scalable.strided.load.nxv4f32(ptr align 4 %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
330 ; CHECK-LABEL: @scalable.strided.load.nxv4f32(
331 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
332 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP14:%.*]]
334 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
335 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
336 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
337 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
338 ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64
339 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
341 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP13:%.*]] ]
342 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
343 ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP13]]
345 ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[IV]], [[TMP7]]
346 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP10]]
347 ; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP11]] to i64
348 ; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP12]], i64 4)
349 ; CHECK-NEXT: br label [[TMP13]]
351 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
352 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
353 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
354 ; CHECK: .split.split:
355 ; CHECK-NEXT: br label [[TMP14]]
357 ; CHECK-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P]], i32 [[STRIDE]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
358 ; CHECK-NEXT: ret <vscale x 4 x float> [[RES]]
360 ; DISABLED-LABEL: @scalable.strided.load.nxv4f32(
361 ; DISABLED-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P:%.*]], i32 [[STRIDE:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
362 ; DISABLED-NEXT: ret <vscale x 4 x float> [[RES]]
364 %res = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl)
365 ret <vscale x 4 x float> %res
368 define void @scalable.strided.store.nxv4f32(<vscale x 4 x float> %arg, ptr align 4 %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
369 ; CHECK-LABEL: @scalable.strided.store.nxv4f32(
370 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
371 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP14:%.*]]
373 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
374 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
375 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
376 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
377 ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64
378 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
380 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP13:%.*]] ]
381 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
382 ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP13]]
384 ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[IV]], [[TMP7]]
385 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP10]]
386 ; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP11]] to i64
387 ; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP12]], i64 4)
388 ; CHECK-NEXT: br label [[TMP13]]
390 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
391 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
392 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
393 ; CHECK: .split.split:
394 ; CHECK-NEXT: br label [[TMP14]]
396 ; CHECK-NEXT: tail call void @llvm.experimental.vp.strided.store.nxv4f32.p0.i32(<vscale x 4 x float> [[ARG:%.*]], ptr [[P]], i32 [[STRIDE]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
397 ; CHECK-NEXT: ret void
399 ; DISABLED-LABEL: @scalable.strided.store.nxv4f32(
400 ; DISABLED-NEXT: tail call void @llvm.experimental.vp.strided.store.nxv4f32.p0.i32(<vscale x 4 x float> [[ARG:%.*]], ptr [[P:%.*]], i32 [[STRIDE:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
401 ; DISABLED-NEXT: ret void
403 tail call void @llvm.experimental.vp.strided.store.nxv4f32.i32(<vscale x 4 x float> %arg, ptr %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl)
407 ; Test the stride is a multiple of the pointer alignment.
408 define <vscale x 4 x float> @scalable.strided.load.nxv4f32.align(ptr align 4 %p, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
409 ; CHECK-LABEL: @scalable.strided.load.nxv4f32.align(
410 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
411 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
413 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
414 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
415 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
416 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
417 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
419 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP12:%.*]] ]
420 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
421 ; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP12]]
423 ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[IV]], 4
424 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP9]]
425 ; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
426 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP11]])
427 ; CHECK-NEXT: br label [[TMP12]]
429 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
430 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
431 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
432 ; CHECK: .split.split:
433 ; CHECK-NEXT: br label [[TMP13]]
435 ; CHECK-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P]], i32 4, <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
436 ; CHECK-NEXT: ret <vscale x 4 x float> [[RES]]
438 ; DISABLED-LABEL: @scalable.strided.load.nxv4f32.align(
439 ; DISABLED-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P:%.*]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
440 ; DISABLED-NEXT: ret <vscale x 4 x float> [[RES]]
442 %res = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr %p, i32 4, <vscale x 4 x i1> %mask, i32 %evl)
443 ret <vscale x 4 x float> %res
446 ; Test vp gather and scatter.
447 declare <vscale x 4 x float> @llvm.vp.gather.nxv4f32.v4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
448 declare void @llvm.vp.scatter.nxv4f32.v4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
450 define <vscale x 4 x float> @scalable.gather.nxv4f32(<vscale x 4 x ptr> %vp, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
451 ; CHECK-LABEL: @scalable.gather.nxv4f32(
452 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
453 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
455 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
456 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
457 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
458 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
459 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
461 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
462 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
463 ; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
465 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 4 x ptr> [[VP:%.*]], i64 [[IV]]
466 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
467 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP10]])
468 ; CHECK-NEXT: br label [[TMP11]]
470 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
471 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
472 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
473 ; CHECK: .split.split:
474 ; CHECK-NEXT: br label [[TMP12]]
476 ; CHECK-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[VP]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
477 ; CHECK-NEXT: ret <vscale x 4 x float> [[RES]]
479 ; DISABLED-LABEL: @scalable.gather.nxv4f32(
480 ; DISABLED-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[VP:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
481 ; DISABLED-NEXT: ret <vscale x 4 x float> [[RES]]
483 %res = tail call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.v4p0(<vscale x 4 x ptr> align 4 %vp, <vscale x 4 x i1> %mask, i32 %evl)
484 ret <vscale x 4 x float> %res
487 define void @scalable.scatter.nxv4f32(<vscale x 4 x float> %arg, <vscale x 4 x ptr> %vp, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
488 ; CHECK-LABEL: @scalable.scatter.nxv4f32(
489 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
490 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
492 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
493 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
494 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
495 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
496 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
498 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
499 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
500 ; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
502 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 4 x ptr> [[VP:%.*]], i64 [[IV]]
503 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
504 ; CHECK-NEXT: call void @__asan_store4(i64 [[TMP10]])
505 ; CHECK-NEXT: br label [[TMP11]]
507 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
508 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
509 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
510 ; CHECK: .split.split:
511 ; CHECK-NEXT: br label [[TMP12]]
513 ; CHECK-NEXT: tail call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[ARG:%.*]], <vscale x 4 x ptr> align 4 [[VP]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
514 ; CHECK-NEXT: ret void
516 ; DISABLED-LABEL: @scalable.scatter.nxv4f32(
517 ; DISABLED-NEXT: tail call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[ARG:%.*]], <vscale x 4 x ptr> align 4 [[VP:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
518 ; DISABLED-NEXT: ret void
520 tail call void @llvm.vp.scatter.nxv4f32.v4p0(<vscale x 4 x float> %arg, <vscale x 4 x ptr> align 4 %vp, <vscale x 4 x i1> %mask, i32 %evl)