1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -instcombine -S < %s | FileCheck %s
4 declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
5 declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
6 declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
7 declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32, <4 x i1> %mask, <4 x double> %passthru)
8 declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
10 define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) {
11 ; CHECK-LABEL: @load_zeromask(
12 ; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]]
14 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
18 define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) {
19 ; CHECK-LABEL: @load_onemask(
20 ; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2
21 ; CHECK-NEXT: ret <2 x double> [[UNMASKEDLOAD]]
23 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru)
27 define <2 x double> @load_undefmask(<2 x double>* %ptr, <2 x double> %passthru) {
28 ; CHECK-LABEL: @load_undefmask(
29 ; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2
30 ; CHECK-NEXT: ret <2 x double> [[UNMASKEDLOAD]]
32 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 undef>, <2 x double> %passthru)
36 @G = external global i8
38 define <2 x double> @load_cemask(<2 x double>* %ptr, <2 x double> %passthru) {
39 ; CHECK-LABEL: @load_cemask(
40 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 ptrtoint (i8* @G to i1)>, <2 x double> [[PASSTHRU:%.*]])
41 ; CHECK-NEXT: ret <2 x double> [[RES]]
43 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 ptrtoint (i8* @G to i1)>, <2 x double> %passthru)
47 define <2 x double> @load_lane0(<2 x double>* %ptr, double %pt) {
48 ; CHECK-LABEL: @load_lane0(
49 ; CHECK-NEXT: [[PTV2:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 1
50 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> [[PTV2]])
51 ; CHECK-NEXT: ret <2 x double> [[RES]]
53 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
54 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
55 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> %ptv2)
59 define double @load_all(double* %base, double %pt) {
60 ; CHECK-LABEL: @load_all(
61 ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 0, i64 poison, i64 2, i64 3>
62 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef)
63 ; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x double> [[RES]], i64 2
64 ; CHECK-NEXT: ret double [[ELT]]
66 %ptrs = getelementptr double, double* %base, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
67 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef)
68 %elt = extractelement <4 x double> %res, i64 2
72 define <2 x double> @load_generic(<2 x double>* %ptr, double %pt, <2 x i1> %mask) {
73 ; CHECK-LABEL: @load_generic(
74 ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
75 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
76 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
77 ; CHECK-NEXT: ret <2 x double> [[RES]]
79 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
80 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
81 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
85 define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) nofree nosync {
86 ; CHECK-LABEL: @load_speculative(
87 ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
88 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
89 ; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
90 ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
91 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
93 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
94 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
95 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
99 define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
100 ; CHECK-LABEL: @load_speculative_less_aligned(
101 ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
102 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
103 ; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
104 ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
105 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
107 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
108 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
109 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
110 ret <2 x double> %res
113 ; Can't speculate since only half of required size is known deref
115 define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
116 ; CHECK-LABEL: @load_spec_neg_size(
117 ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
118 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
119 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
120 ; CHECK-NEXT: ret <2 x double> [[RES]]
122 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
123 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
124 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
125 ret <2 x double> %res
128 ; Can only speculate one lane (but it's the only one active)
129 define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
130 ; CHECK-LABEL: @load_spec_lan0(
131 ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
132 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
133 ; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
134 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PTV2]])
135 ; CHECK-NEXT: ret <2 x double> [[RES]]
137 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
138 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
139 %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
140 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask2, <2 x double> %ptv2)
141 ret <2 x double> %res
144 define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val) {
145 ; CHECK-LABEL: @store_zeromask(
146 ; CHECK-NEXT: ret void
148 call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> zeroinitializer)
152 define void @store_onemask(<2 x double>* %ptr, <2 x double> %val) {
153 ; CHECK-LABEL: @store_onemask(
154 ; CHECK-NEXT: store <2 x double> [[VAL:%.*]], <2 x double>* [[PTR:%.*]], align 4
155 ; CHECK-NEXT: ret void
157 call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>)
161 define void @store_demandedelts(<2 x double>* %ptr, double %val) {
162 ; CHECK-LABEL: @store_demandedelts(
163 ; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i64 0
164 ; CHECK-NEXT: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> [[VALVEC1]], <2 x double>* [[PTR:%.*]], i32 4, <2 x i1> <i1 true, i1 false>)
165 ; CHECK-NEXT: ret void
167 %valvec1 = insertelement <2 x double> undef, double %val, i32 0
168 %valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1
169 call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %valvec2, <2 x double>* %ptr, i32 4, <2 x i1> <i1 true, i1 false>)
173 define <2 x double> @gather_generic(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %passthru) {
174 ; CHECK-LABEL: @gather_generic(
175 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PASSTHRU:%.*]])
176 ; CHECK-NEXT: ret <2 x double> [[RES]]
178 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %passthru)
179 ret <2 x double> %res
183 define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru) {
184 ; CHECK-LABEL: @gather_zeromask(
185 ; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]]
187 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> zeroinitializer, <2 x double> %passthru)
188 ret <2 x double> %res
192 define <2 x double> @gather_onemask(<2 x double*> %ptrs, <2 x double> %passthru) {
193 ; CHECK-LABEL: @gather_onemask(
194 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> poison)
195 ; CHECK-NEXT: ret <2 x double> [[RES]]
197 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> %passthru)
198 ret <2 x double> %res
201 define <4 x double> @gather_lane2(double* %base, double %pt) {
202 ; CHECK-LABEL: @gather_lane2(
203 ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 poison, i64 poison, i64 2, i64 poison>
204 ; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <4 x double> undef, double [[PT:%.*]], i64 0
205 ; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 0>
206 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> [[PT_V2]])
207 ; CHECK-NEXT: ret <4 x double> [[RES]]
209 %ptrs = getelementptr double, double *%base, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
210 %pt_v1 = insertelement <4 x double> undef, double %pt, i64 0
211 %pt_v2 = shufflevector <4 x double> %pt_v1, <4 x double> undef, <4 x i32> zeroinitializer
212 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %pt_v2)
213 ret <4 x double> %res
216 define <2 x double> @gather_lane0_maybe(double* %base, double %pt, <2 x i1> %mask) {
217 ; CHECK-LABEL: @gather_lane0_maybe(
218 ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1>
219 ; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
220 ; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> poison, <2 x i32> zeroinitializer
221 ; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
222 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]])
223 ; CHECK-NEXT: ret <2 x double> [[RES]]
225 %ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1>
226 %pt_v1 = insertelement <2 x double> undef, double %pt, i64 0
227 %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1
228 %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
229 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2)
230 ret <2 x double> %res
233 define <2 x double> @gather_lane0_maybe_spec(double* %base, double %pt, <2 x i1> %mask) {
234 ; CHECK-LABEL: @gather_lane0_maybe_spec(
235 ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1>
236 ; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
237 ; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> poison, <2 x i32> zeroinitializer
238 ; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
239 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]])
240 ; CHECK-NEXT: ret <2 x double> [[RES]]
242 %ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1>
243 %pt_v1 = insertelement <2 x double> undef, double %pt, i64 0
244 %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1
245 %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
246 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2)
247 ret <2 x double> %res
251 define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val) {
252 ; CHECK-LABEL: @scatter_zeromask(
253 ; CHECK-NEXT: ret void
255 call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 8, <2 x i1> zeroinitializer)
259 define void @scatter_demandedelts(double* %ptr, double %val) {
260 ; CHECK-LABEL: @scatter_demandedelts(
261 ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[PTR:%.*]], <2 x i64> <i64 0, i64 poison>
262 ; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i64 0
263 ; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[VALVEC1]], <2 x double*> [[PTRS]], i32 8, <2 x i1> <i1 true, i1 false>)
264 ; CHECK-NEXT: ret void
266 %ptrs = getelementptr double, double* %ptr, <2 x i64> <i64 0, i64 1>
267 %valvec1 = insertelement <2 x double> undef, double %val, i32 0
268 %valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1
269 call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %valvec2, <2 x double*> %ptrs, i32 8, <2 x i1> <i1 true, i1 false>)
274 ; Test scatters that can be simplified to scalar stores.
276 ;; Value splat (mask is not used)
277 define void @scatter_v4i16_uniform_vals_uniform_ptrs_no_all_active_mask(i16* %dst, i16 %val) {
278 ; CHECK-LABEL: @scatter_v4i16_uniform_vals_uniform_ptrs_no_all_active_mask(
280 ; CHECK-NEXT: store i16 [[VAL:%.*]], i16* [[DST:%.*]], align 2
281 ; CHECK-NEXT: ret void
284 %broadcast.splatinsert = insertelement <4 x i16*> poison, i16* %dst, i32 0
285 %broadcast.splat = shufflevector <4 x i16*> %broadcast.splatinsert, <4 x i16*> poison, <4 x i32> zeroinitializer
286 %broadcast.value = insertelement <4 x i16> poison, i16 %val, i32 0
287 %broadcast.splatvalue = shufflevector <4 x i16> %broadcast.value, <4 x i16> poison, <4 x i32> zeroinitializer
288 call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %broadcast.splatvalue, <4 x i16*> %broadcast.splat, i32 2, <4 x i1> <i1 0, i1 0, i1 1, i1 1>)
292 define void @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask(i16* %dst, i16 %val) {
293 ; CHECK-LABEL: @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask(
295 ; CHECK-NEXT: store i16 [[VAL:%.*]], i16* [[DST:%.*]], align 2
296 ; CHECK-NEXT: ret void
299 %broadcast.splatinsert = insertelement <vscale x 4 x i16*> poison, i16* %dst, i32 0
300 %broadcast.splat = shufflevector <vscale x 4 x i16*> %broadcast.splatinsert, <vscale x 4 x i16*> poison, <vscale x 4 x i32> zeroinitializer
301 %broadcast.value = insertelement <vscale x 4 x i16> poison, i16 %val, i32 0
302 %broadcast.splatvalue = shufflevector <vscale x 4 x i16> %broadcast.value, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
303 call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %broadcast.splatvalue, <vscale x 4 x i16*> %broadcast.splat, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> zeroinitializer , i1 true, i32 0), <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> zeroinitializer))
307 ;; The pointer is splat and mask is all active, but value is not a splat
308 define void @scatter_v4i16_no_uniform_vals_uniform_ptrs_all_active_mask(i16* %dst, <4 x i16>* %src) {
309 ; CHECK-LABEL: @scatter_v4i16_no_uniform_vals_uniform_ptrs_all_active_mask(
311 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[SRC:%.*]], align 2
312 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
313 ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DST:%.*]], align 2
314 ; CHECK-NEXT: ret void
317 %broadcast.splatinsert = insertelement <4 x i16*> poison, i16* %dst, i32 0
318 %broadcast.splat = shufflevector <4 x i16*> %broadcast.splatinsert, <4 x i16*> poison, <4 x i32> zeroinitializer
319 %wide.load = load <4 x i16>, <4 x i16>* %src, align 2
320 call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %wide.load, <4 x i16*> %broadcast.splat, i32 2, <4 x i1> <i1 1, i1 1, i1 1, i1 1>)
324 define void @scatter_nxv4i16_no_uniform_vals_uniform_ptrs_all_active_mask(i16* %dst, <vscale x 4 x i16>* %src) {
325 ; CHECK-LABEL: @scatter_nxv4i16_no_uniform_vals_uniform_ptrs_all_active_mask(
327 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, <vscale x 4 x i16>* [[SRC:%.*]], align 2
328 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
329 ; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 2
330 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -1
331 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <vscale x 4 x i16> [[WIDE_LOAD]], i32 [[TMP2]]
332 ; CHECK-NEXT: store i16 [[TMP3]], i16* [[DST:%.*]], align 2
333 ; CHECK-NEXT: ret void
336 %broadcast.splatinsert = insertelement <vscale x 4 x i16*> poison, i16* %dst, i32 0
337 %broadcast.splat = shufflevector <vscale x 4 x i16*> %broadcast.splatinsert, <vscale x 4 x i16*> poison, <vscale x 4 x i32> zeroinitializer
338 %wide.load = load <vscale x 4 x i16>, <vscale x 4 x i16>* %src, align 2
339 call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %wide.load, <vscale x 4 x i16*> %broadcast.splat, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
343 ; Negative scatter tests
345 ;; Pointer is splat, but mask is not all active and value is not a splat
346 define void @negative_scatter_v4i16_no_uniform_vals_uniform_ptrs_all_inactive_mask(i16* %dst, <4 x i16>* %src) {
347 ; CHECK-LABEL: @negative_scatter_v4i16_no_uniform_vals_uniform_ptrs_all_inactive_mask(
348 ; CHECK-NEXT: [[INSERT_ELT:%.*]] = insertelement <4 x i16*> poison, i16* [[DST:%.*]], i64 0
349 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16*> [[INSERT_ELT]], <4 x i16*> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 0>
350 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[SRC:%.*]], align 2
351 ; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> [[WIDE_LOAD]], <4 x i16*> [[BROADCAST_SPLAT]], i32 2, <4 x i1> <i1 false, i1 false, i1 true, i1 true>)
352 ; CHECK-NEXT: ret void
354 %insert.elt = insertelement <4 x i16*> poison, i16* %dst, i32 0
355 %broadcast.splat = shufflevector <4 x i16*> %insert.elt, <4 x i16*> poison, <4 x i32> zeroinitializer
356 %wide.load = load <4 x i16>, <4 x i16>* %src, align 2
357 call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %wide.load, <4 x i16*> %broadcast.splat, i32 2, <4 x i1> <i1 0, i1 0, i1 1, i1 1>)
361 ;; The pointer in NOT a splat
362 define void @negative_scatter_v4i16_no_uniform_vals_no_uniform_ptrs_all_active_mask(<4 x i16*> %inPtr, <4 x i16>* %src) {
363 ; CHECK-LABEL: @negative_scatter_v4i16_no_uniform_vals_no_uniform_ptrs_all_active_mask(
364 ; CHECK-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x i16*> [[INPTR:%.*]], <4 x i16*> poison, <4 x i32> zeroinitializer
365 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[SRC:%.*]], align 2
366 ; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> [[WIDE_LOAD]], <4 x i16*> [[BROADCAST]], i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
367 ; CHECK-NEXT: ret void
369 %broadcast= shufflevector <4 x i16*> %inPtr, <4 x i16*> poison, <4 x i32> zeroinitializer
370 %wide.load = load <4 x i16>, <4 x i16>* %src, align 2
371 call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %wide.load, <4 x i16*> %broadcast, i32 2, <4 x i1> <i1 1, i1 1, i1 1, i1 1> )
377 declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32 immarg, <4 x i1>)
378 declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32 immarg, <vscale x 4 x i1>)