1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -S \
4 ; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S \
5 ; RUN: | FileCheck %s -check-prefix=DISABLED
7 ; Support ASan instrumentation for constant-mask llvm.vp.{load,store}
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11 ;;;;;;;;;;;;;;;; STORE
12 declare void @llvm.vp.store.v4f32.p0(<4 x float>, ptr, <4 x i1>, i32) argmemonly nounwind
14 define void @store.v4f32.variable(ptr align 4 %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address {
15 ; CHECK-LABEL: @store.v4f32.variable(
16 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
17 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
19 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
20 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
21 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
23 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
24 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]]
25 ; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
27 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
28 ; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
29 ; CHECK-NEXT: call void @__asan_store4(i64 [[TMP8]])
30 ; CHECK-NEXT: br label [[TMP9]]
32 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
33 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
34 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
35 ; CHECK: .split.split:
36 ; CHECK-NEXT: br label [[TMP10]]
38 ; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]])
39 ; CHECK-NEXT: ret void
41 ; DISABLED-LABEL: @store.v4f32.variable(
42 ; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
43 ; DISABLED-NEXT: ret void
45 tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> %mask, i32 %evl)
49 ;; Store using two vp.stores, which should instrument them both.
50 define void @store.v4f32.1010.split(ptr align 4 %p, <4 x float> %arg, i32 %evl) sanitize_address {
51 ; CHECK-LABEL: @store.v4f32.1010.split(
52 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
53 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
55 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
56 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
57 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
59 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
60 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i64 [[IV]]
61 ; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
63 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
64 ; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
65 ; CHECK-NEXT: call void @__asan_store4(i64 [[TMP8]])
66 ; CHECK-NEXT: br label [[TMP9]]
68 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
69 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
70 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
71 ; CHECK: .split.split:
72 ; CHECK-NEXT: br label [[TMP10]]
74 ; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 [[EVL]])
75 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i32 [[EVL]], 0
76 ; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP20:%.*]]
78 ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[EVL]] to i64
79 ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP13]], i64 4)
80 ; CHECK-NEXT: br label [[DOTSPLIT1:%.*]]
82 ; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ 0, [[TMP12]] ], [ [[IV2_NEXT:%.*]], [[TMP19:%.*]] ]
83 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i64 [[IV2]]
84 ; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP19]]
86 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV2]]
87 ; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
88 ; CHECK-NEXT: call void @__asan_store4(i64 [[TMP18]])
89 ; CHECK-NEXT: br label [[TMP19]]
91 ; CHECK-NEXT: [[IV2_NEXT]] = add nuw nsw i64 [[IV2]], 1
92 ; CHECK-NEXT: [[IV2_CHECK:%.*]] = icmp eq i64 [[IV2_NEXT]], [[TMP14]]
93 ; CHECK-NEXT: br i1 [[IV2_CHECK]], label [[DOTSPLIT1_SPLIT:%.*]], label [[DOTSPLIT1]]
94 ; CHECK: .split1.split:
95 ; CHECK-NEXT: br label [[TMP20]]
97 ; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL]])
98 ; CHECK-NEXT: ret void
100 ; DISABLED-LABEL: @store.v4f32.1010.split(
101 ; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 [[EVL:%.*]])
102 ; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL]])
103 ; DISABLED-NEXT: ret void
105 tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 %evl)
106 tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 %evl)
110 ;; Store using a vp.store after a full store. Shouldn't instrument the second one.
111 define void @store.v4f32.0010.after.full.store(ptr align 4 %p, <4 x float> %arg, i32 %evl) sanitize_address {
112 ; CHECK-LABEL: @store.v4f32.0010.after.full.store(
113 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
114 ; CHECK-NEXT: call void @__asan_store16(i64 [[TMP1]])
115 ; CHECK-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P]], align 16
116 ; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL:%.*]])
117 ; CHECK-NEXT: ret void
119 ; DISABLED-LABEL: @store.v4f32.0010.after.full.store(
120 ; DISABLED-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P:%.*]], align 16
121 ; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL:%.*]])
122 ; DISABLED-NEXT: ret void
124 store <4 x float> %arg, ptr %p
125 tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 %evl)
129 ;;;;;;;;;;;;;;;; LOAD
130 declare <4 x float> @llvm.vp.load.v4f32.p0(ptr, <4 x i1>, i32) argmemonly nounwind
131 declare <8 x i32> @llvm.vp.load.v8i32.p0(ptr, <8 x i1>, i32) argmemonly nounwind
133 define <4 x float> @load.v4f32.variable(ptr align 4 %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address {
134 ; CHECK-LABEL: @load.v4f32.variable(
135 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
136 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
138 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
139 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
140 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
142 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
143 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]]
144 ; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
146 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
147 ; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
148 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP8]])
149 ; CHECK-NEXT: br label [[TMP9]]
151 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
152 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
153 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
154 ; CHECK: .split.split:
155 ; CHECK-NEXT: br label [[TMP10]]
157 ; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]])
158 ; CHECK-NEXT: ret <4 x float> [[RES]]
160 ; DISABLED-LABEL: @load.v4f32.variable(
161 ; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
162 ; DISABLED-NEXT: ret <4 x float> [[RES]]
164 %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> %mask, i32 %evl)
168 ;; Load using two vp.loads, which should instrument them both.
169 define <4 x float> @load.v4f32.1001.split(ptr align 4 %p, i32 %evl) sanitize_address {
170 ; CHECK-LABEL: @load.v4f32.1001.split(
171 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
172 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
174 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
175 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
176 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
178 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
179 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i64 [[IV]]
180 ; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
182 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
183 ; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
184 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP8]])
185 ; CHECK-NEXT: br label [[TMP9]]
187 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
188 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
189 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
190 ; CHECK: .split.split:
191 ; CHECK-NEXT: br label [[TMP10]]
193 ; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 [[EVL]])
194 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i32 [[EVL]], 0
195 ; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP20:%.*]]
197 ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[EVL]] to i64
198 ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP13]], i64 4)
199 ; CHECK-NEXT: br label [[DOTSPLIT1:%.*]]
201 ; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ 0, [[TMP12]] ], [ [[IV2_NEXT:%.*]], [[TMP19:%.*]] ]
202 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i64 [[IV2]]
203 ; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP19]]
205 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV2]]
206 ; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
207 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP18]])
208 ; CHECK-NEXT: br label [[TMP19]]
210 ; CHECK-NEXT: [[IV2_NEXT]] = add nuw nsw i64 [[IV2]], 1
211 ; CHECK-NEXT: [[IV2_CHECK:%.*]] = icmp eq i64 [[IV2_NEXT]], [[TMP14]]
212 ; CHECK-NEXT: br i1 [[IV2_CHECK]], label [[DOTSPLIT1_SPLIT:%.*]], label [[DOTSPLIT1]]
213 ; CHECK: .split1.split:
214 ; CHECK-NEXT: br label [[TMP20]]
216 ; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL]])
217 ; CHECK-NEXT: ret <4 x float> [[RES2]]
219 ; DISABLED-LABEL: @load.v4f32.1001.split(
220 ; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 [[EVL:%.*]])
221 ; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL]])
222 ; DISABLED-NEXT: ret <4 x float> [[RES2]]
224 %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 %evl)
225 %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 %evl)
226 ret <4 x float> %res2
229 ;; Load using a vp.load after a full load. Shouldn't instrument the second one.
230 define <4 x float> @load.v4f32.1001.after.full.load(ptr align 4 %p, i32 %evl) sanitize_address {
231 ; CHECK-LABEL: @load.v4f32.1001.after.full.load(
232 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
233 ; CHECK-NEXT: call void @__asan_load16(i64 [[TMP1]])
234 ; CHECK-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P]], align 16
235 ; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL:%.*]])
236 ; CHECK-NEXT: ret <4 x float> [[RES2]]
238 ; DISABLED-LABEL: @load.v4f32.1001.after.full.load(
239 ; DISABLED-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
240 ; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL:%.*]])
241 ; DISABLED-NEXT: ret <4 x float> [[RES2]]
243 %res = load <4 x float>, ptr %p
244 %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 %evl)
245 ret <4 x float> %res2
248 ;; Scalable vector tests
249 ;; ---------------------------
250 declare <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr, <vscale x 4 x i1>, i32)
251 declare void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float>, ptr, <vscale x 4 x i1>, i32)
253 define <vscale x 4 x float> @scalable.load.nxv4f32(ptr align 4 %p, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
254 ; CHECK-LABEL: @scalable.load.nxv4f32(
255 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
256 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
258 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
259 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
260 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
261 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
262 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
264 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
265 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
266 ; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
268 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
269 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
270 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP10]])
271 ; CHECK-NEXT: br label [[TMP11]]
273 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
274 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
275 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
276 ; CHECK: .split.split:
277 ; CHECK-NEXT: br label [[TMP12]]
279 ; CHECK-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr [[P]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
280 ; CHECK-NEXT: ret <vscale x 4 x float> [[RES]]
282 ; DISABLED-LABEL: @scalable.load.nxv4f32(
283 ; DISABLED-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr [[P:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
284 ; DISABLED-NEXT: ret <vscale x 4 x float> [[RES]]
286 %res = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr %p, <vscale x 4 x i1> %mask, i32 %evl)
287 ret <vscale x 4 x float> %res
290 define void @scalable.store.nxv4f32(ptr align 4 %p, <vscale x 4 x float> %arg, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
291 ; CHECK-LABEL: @scalable.store.nxv4f32(
292 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
293 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
295 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
296 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
297 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
298 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
299 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
301 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
302 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
303 ; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
305 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
306 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
307 ; CHECK-NEXT: call void @__asan_store4(i64 [[TMP10]])
308 ; CHECK-NEXT: br label [[TMP11]]
310 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
311 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
312 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
313 ; CHECK: .split.split:
314 ; CHECK-NEXT: br label [[TMP12]]
316 ; CHECK-NEXT: tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.*]], ptr [[P]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
317 ; CHECK-NEXT: ret void
319 ; DISABLED-LABEL: @scalable.store.nxv4f32(
320 ; DISABLED-NEXT: tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.*]], ptr [[P:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
321 ; DISABLED-NEXT: ret void
323 tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> %arg, ptr %p, <vscale x 4 x i1> %mask, i32 %evl)
327 ; Test vp strided load stores.
328 declare <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr, i32, <vscale x 4 x i1>, i32)
329 declare void @llvm.experimental.vp.strided.store.nxv4f32.i32(<vscale x 4 x float>, ptr, i32, <vscale x 4 x i1>, i32)
331 define <vscale x 4 x float> @scalable.strided.load.nxv4f32(ptr align 4 %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
332 ; CHECK-LABEL: @scalable.strided.load.nxv4f32(
333 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
334 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP14:%.*]]
336 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
337 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
338 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
339 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
340 ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64
341 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
343 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP13:%.*]] ]
344 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
345 ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP13]]
347 ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[IV]], [[TMP7]]
348 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP10]]
349 ; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP11]] to i64
350 ; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP12]], i64 4)
351 ; CHECK-NEXT: br label [[TMP13]]
353 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
354 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
355 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
356 ; CHECK: .split.split:
357 ; CHECK-NEXT: br label [[TMP14]]
359 ; CHECK-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P]], i32 [[STRIDE]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
360 ; CHECK-NEXT: ret <vscale x 4 x float> [[RES]]
362 ; DISABLED-LABEL: @scalable.strided.load.nxv4f32(
363 ; DISABLED-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P:%.*]], i32 [[STRIDE:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
364 ; DISABLED-NEXT: ret <vscale x 4 x float> [[RES]]
366 %res = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl)
367 ret <vscale x 4 x float> %res
370 define void @scalable.strided.store.nxv4f32(<vscale x 4 x float> %arg, ptr align 4 %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
371 ; CHECK-LABEL: @scalable.strided.store.nxv4f32(
372 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
373 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP14:%.*]]
375 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
376 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
377 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
378 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
379 ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64
380 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
382 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP13:%.*]] ]
383 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
384 ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP13]]
386 ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[IV]], [[TMP7]]
387 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP10]]
388 ; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP11]] to i64
389 ; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP12]], i64 4)
390 ; CHECK-NEXT: br label [[TMP13]]
392 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
393 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
394 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
395 ; CHECK: .split.split:
396 ; CHECK-NEXT: br label [[TMP14]]
398 ; CHECK-NEXT: tail call void @llvm.experimental.vp.strided.store.nxv4f32.p0.i32(<vscale x 4 x float> [[ARG:%.*]], ptr [[P]], i32 [[STRIDE]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
399 ; CHECK-NEXT: ret void
401 ; DISABLED-LABEL: @scalable.strided.store.nxv4f32(
402 ; DISABLED-NEXT: tail call void @llvm.experimental.vp.strided.store.nxv4f32.p0.i32(<vscale x 4 x float> [[ARG:%.*]], ptr [[P:%.*]], i32 [[STRIDE:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
403 ; DISABLED-NEXT: ret void
405 tail call void @llvm.experimental.vp.strided.store.nxv4f32.i32(<vscale x 4 x float> %arg, ptr %p, i32 %stride, <vscale x 4 x i1> %mask, i32 %evl)
409 ; Test the stride is a multiple of the pointer alignment.
410 define <vscale x 4 x float> @scalable.strided.load.nxv4f32.align(ptr align 4 %p, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
411 ; CHECK-LABEL: @scalable.strided.load.nxv4f32.align(
412 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
413 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
415 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
416 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
417 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
418 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
419 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
421 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP12:%.*]] ]
422 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
423 ; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP12]]
425 ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[IV]], 4
426 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP9]]
427 ; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
428 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP11]])
429 ; CHECK-NEXT: br label [[TMP12]]
431 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
432 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
433 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
434 ; CHECK: .split.split:
435 ; CHECK-NEXT: br label [[TMP13]]
437 ; CHECK-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P]], i32 4, <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
438 ; CHECK-NEXT: ret <vscale x 4 x float> [[RES]]
440 ; DISABLED-LABEL: @scalable.strided.load.nxv4f32.align(
441 ; DISABLED-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P:%.*]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
442 ; DISABLED-NEXT: ret <vscale x 4 x float> [[RES]]
444 %res = tail call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr %p, i32 4, <vscale x 4 x i1> %mask, i32 %evl)
445 ret <vscale x 4 x float> %res
448 ; Test vp gather and scatter.
449 declare <vscale x 4 x float> @llvm.vp.gather.nxv4f32.v4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
450 declare void @llvm.vp.scatter.nxv4f32.v4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
452 define <vscale x 4 x float> @scalable.gather.nxv4f32(<vscale x 4 x ptr> %vp, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
453 ; CHECK-LABEL: @scalable.gather.nxv4f32(
454 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
455 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
457 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
458 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
459 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
460 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
461 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
463 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
464 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
465 ; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
467 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 4 x ptr> [[VP:%.*]], i64 [[IV]]
468 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
469 ; CHECK-NEXT: call void @__asan_load4(i64 [[TMP10]])
470 ; CHECK-NEXT: br label [[TMP11]]
472 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
473 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
474 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
475 ; CHECK: .split.split:
476 ; CHECK-NEXT: br label [[TMP12]]
478 ; CHECK-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[VP]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
479 ; CHECK-NEXT: ret <vscale x 4 x float> [[RES]]
481 ; DISABLED-LABEL: @scalable.gather.nxv4f32(
482 ; DISABLED-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 [[VP:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
483 ; DISABLED-NEXT: ret <vscale x 4 x float> [[RES]]
485 %res = tail call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.v4p0(<vscale x 4 x ptr> align 4 %vp, <vscale x 4 x i1> %mask, i32 %evl)
486 ret <vscale x 4 x float> %res
489 define void @scalable.scatter.nxv4f32(<vscale x 4 x float> %arg, <vscale x 4 x ptr> %vp, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
490 ; CHECK-LABEL: @scalable.scatter.nxv4f32(
491 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
492 ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
494 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
495 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
496 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
497 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
498 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
500 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
501 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
502 ; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
504 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 4 x ptr> [[VP:%.*]], i64 [[IV]]
505 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
506 ; CHECK-NEXT: call void @__asan_store4(i64 [[TMP10]])
507 ; CHECK-NEXT: br label [[TMP11]]
509 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
510 ; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
511 ; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
512 ; CHECK: .split.split:
513 ; CHECK-NEXT: br label [[TMP12]]
515 ; CHECK-NEXT: tail call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[ARG:%.*]], <vscale x 4 x ptr> align 4 [[VP]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
516 ; CHECK-NEXT: ret void
518 ; DISABLED-LABEL: @scalable.scatter.nxv4f32(
519 ; DISABLED-NEXT: tail call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[ARG:%.*]], <vscale x 4 x ptr> align 4 [[VP:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
520 ; DISABLED-NEXT: ret void
522 tail call void @llvm.vp.scatter.nxv4f32.v4p0(<vscale x 4 x float> %arg, <vscale x 4 x ptr> align 4 %vp, <vscale x 4 x i1> %mask, i32 %evl)