1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -passes=vector-combine -data-layout=e < %s | FileCheck %s
3 ; RUN: opt -S -passes=vector-combine -data-layout=E < %s | FileCheck %s
5 define void @insert_store(<16 x i8>* %q, i8 zeroext %s) {
6 ; CHECK-LABEL: @insert_store(
8 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 3
9 ; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
10 ; CHECK-NEXT: ret void
13 %0 = load <16 x i8>, <16 x i8>* %q
14 %vecins = insertelement <16 x i8> %0, i8 %s, i32 3
15 store <16 x i8> %vecins, <16 x i8>* %q, align 16
19 define void @insert_store_i16_align1(<8 x i16>* %q, i16 zeroext %s) {
20 ; CHECK-LABEL: @insert_store_i16_align1(
22 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[Q:%.*]], i32 0, i32 3
23 ; CHECK-NEXT: store i16 [[S:%.*]], i16* [[TMP0]], align 2
24 ; CHECK-NEXT: ret void
27 %0 = load <8 x i16>, <8 x i16>* %q
28 %vecins = insertelement <8 x i16> %0, i16 %s, i32 3
29 store <8 x i16> %vecins, <8 x i16>* %q, align 1
33 ; To verify case when index is out of bounds
34 define void @insert_store_outofbounds(<8 x i16>* %q, i16 zeroext %s) {
35 ; CHECK-LABEL: @insert_store_outofbounds(
37 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16
38 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9
39 ; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16
40 ; CHECK-NEXT: ret void
43 %0 = load <8 x i16>, <8 x i16>* %q
44 %vecins = insertelement <8 x i16> %0, i16 %s, i32 9
45 store <8 x i16> %vecins, <8 x i16>* %q
49 define void @insert_store_vscale(<vscale x 8 x i16>* %q, i16 zeroext %s) {
50 ; CHECK-LABEL: @insert_store_vscale(
52 ; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i16>, <vscale x 8 x i16>* [[Q:%.*]], align 16
53 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <vscale x 8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
54 ; CHECK-NEXT: store <vscale x 8 x i16> [[VECINS]], <vscale x 8 x i16>* [[Q]], align 16
55 ; CHECK-NEXT: ret void
58 %0 = load <vscale x 8 x i16>, <vscale x 8 x i16>* %q
59 %vecins = insertelement <vscale x 8 x i16> %0, i16 %s, i32 3
60 store <vscale x 8 x i16> %vecins, <vscale x 8 x i16>* %q
64 define void @insert_store_v9i4(<9 x i4>* %q, i4 zeroext %s) {
65 ; CHECK-LABEL: @insert_store_v9i4(
67 ; CHECK-NEXT: [[TMP0:%.*]] = load <9 x i4>, <9 x i4>* [[Q:%.*]], align 8
68 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <9 x i4> [[TMP0]], i4 [[S:%.*]], i32 3
69 ; CHECK-NEXT: store <9 x i4> [[VECINS]], <9 x i4>* [[Q]], align 1
70 ; CHECK-NEXT: ret void
73 %0 = load <9 x i4>, <9 x i4>* %q
74 %vecins = insertelement <9 x i4> %0, i4 %s, i32 3
75 store <9 x i4> %vecins, <9 x i4>* %q, align 1
79 define void @insert_store_v4i27(<4 x i27>* %q, i27 zeroext %s) {
80 ; CHECK-LABEL: @insert_store_v4i27(
82 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i27>, <4 x i27>* [[Q:%.*]], align 16
83 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i27> [[TMP0]], i27 [[S:%.*]], i32 3
84 ; CHECK-NEXT: store <4 x i27> [[VECINS]], <4 x i27>* [[Q]], align 1
85 ; CHECK-NEXT: ret void
88 %0 = load <4 x i27>, <4 x i27>* %q
89 %vecins = insertelement <4 x i27> %0, i27 %s, i32 3
90 store <4 x i27> %vecins, <4 x i27>* %q, align 1
94 define void @insert_store_blk_differ(<8 x i16>* %q, i16 zeroext %s) {
95 ; CHECK-LABEL: @insert_store_blk_differ(
97 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16
98 ; CHECK-NEXT: br label [[CONT:%.*]]
100 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
101 ; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16
102 ; CHECK-NEXT: ret void
105 %0 = load <8 x i16>, <8 x i16>* %q
108 %vecins = insertelement <8 x i16> %0, i16 %s, i32 3
109 store <8 x i16> %vecins, <8 x i16>* %q
113 define void @insert_store_nonconst(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
114 ; CHECK-LABEL: @insert_store_nonconst(
116 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
117 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]]
118 ; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
119 ; CHECK-NEXT: ret void
122 %0 = load <16 x i8>, <16 x i8>* %q
123 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
124 store <16 x i8> %vecins, <16 x i8>* %q
128 ; To verify align here is narrowed to scalar store size
129 define void @insert_store_nonconst_large_alignment(<4 x i32>* %q, i32 zeroext %s, i32 %idx) {
130 ; CHECK-LABEL: @insert_store_nonconst_large_alignment(
132 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
133 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
134 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[Q:%.*]], i32 0, i32 [[IDX]]
135 ; CHECK-NEXT: store i32 [[S:%.*]], i32* [[TMP0]], align 4
136 ; CHECK-NEXT: ret void
139 %cmp = icmp ult i32 %idx, 4
140 call void @llvm.assume(i1 %cmp)
141 %i = load <4 x i32>, <4 x i32>* %q, align 128
142 %vecins = insertelement <4 x i32> %i, i32 %s, i32 %idx
143 store <4 x i32> %vecins, <4 x i32>* %q, align 128
147 define void @insert_store_nonconst_align_maximum_8(<8 x i64>* %q, i64 %s, i32 %idx) {
148 ; CHECK-LABEL: @insert_store_nonconst_align_maximum_8(
149 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
150 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
151 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
152 ; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 8
153 ; CHECK-NEXT: ret void
155 %cmp = icmp ult i32 %idx, 2
156 call void @llvm.assume(i1 %cmp)
157 %i = load <8 x i64>, <8 x i64>* %q, align 8
158 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
159 store <8 x i64> %vecins, <8 x i64>* %q, align 8
163 define void @insert_store_nonconst_align_maximum_4(<8 x i64>* %q, i64 %s, i32 %idx) {
164 ; CHECK-LABEL: @insert_store_nonconst_align_maximum_4(
165 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
166 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
167 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
168 ; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4
169 ; CHECK-NEXT: ret void
171 %cmp = icmp ult i32 %idx, 2
172 call void @llvm.assume(i1 %cmp)
173 %i = load <8 x i64>, <8 x i64>* %q, align 4
174 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
175 store <8 x i64> %vecins, <8 x i64>* %q, align 4
179 define void @insert_store_nonconst_align_larger(<8 x i64>* %q, i64 %s, i32 %idx) {
180 ; CHECK-LABEL: @insert_store_nonconst_align_larger(
181 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
182 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
183 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
184 ; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4
185 ; CHECK-NEXT: ret void
187 %cmp = icmp ult i32 %idx, 2
188 call void @llvm.assume(i1 %cmp)
189 %i = load <8 x i64>, <8 x i64>* %q, align 4
190 %vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
191 store <8 x i64> %vecins, <8 x i64>* %q, align 2
195 define void @insert_store_nonconst_index_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
196 ; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_assume(
198 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
199 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
200 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX]]
201 ; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
202 ; CHECK-NEXT: ret void
205 %cmp = icmp ult i32 %idx, 4
206 call void @llvm.assume(i1 %cmp)
207 %0 = load <16 x i8>, <16 x i8>* %q
208 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
209 store <16 x i8> %vecins, <16 x i8>* %q
213 declare void @maythrow() readnone
215 define void @insert_store_nonconst_index_not_known_valid_by_assume_after_load(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
216 ; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume_after_load(
218 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
219 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
220 ; CHECK-NEXT: call void @maythrow()
221 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
222 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
223 ; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
224 ; CHECK-NEXT: ret void
227 %cmp = icmp ult i32 %idx, 4
228 %0 = load <16 x i8>, <16 x i8>* %q
229 call void @maythrow()
230 call void @llvm.assume(i1 %cmp)
231 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
232 store <16 x i8> %vecins, <16 x i8>* %q
236 define void @insert_store_nonconst_index_not_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
237 ; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume(
239 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 17
240 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
241 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
242 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
243 ; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
244 ; CHECK-NEXT: ret void
247 %cmp = icmp ult i32 %idx, 17
248 call void @llvm.assume(i1 %cmp)
249 %0 = load <16 x i8>, <16 x i8>* %q
250 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
251 store <16 x i8> %vecins, <16 x i8>* %q
255 declare void @llvm.assume(i1)
257 define void @insert_store_nonconst_index_known_noundef_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
258 ; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_and(
260 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
261 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
262 ; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
263 ; CHECK-NEXT: ret void
266 %0 = load <16 x i8>, <16 x i8>* %q
267 %idx.clamped = and i32 %idx, 7
268 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
269 store <16 x i8> %vecins, <16 x i8>* %q
273 define void @insert_store_nonconst_index_base_frozen_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
274 ; CHECK-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_and(
276 ; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]]
277 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX_FROZEN]], 7
278 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
279 ; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
280 ; CHECK-NEXT: ret void
283 %0 = load <16 x i8>, <16 x i8>* %q
284 %idx.frozen = freeze i32 %idx
285 %idx.clamped = and i32 %idx.frozen, 7
286 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
287 store <16 x i8> %vecins, <16 x i8>* %q
291 define void @insert_store_nonconst_index_frozen_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
292 ; CHECK-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_and(
294 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
295 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
296 ; CHECK-NEXT: [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]]
297 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]]
298 ; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
299 ; CHECK-NEXT: ret void
302 %0 = load <16 x i8>, <16 x i8>* %q
303 %idx.clamped = and i32 %idx, 7
304 %idx.clamped.frozen = freeze i32 %idx.clamped
305 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped.frozen
306 store <16 x i8> %vecins, <16 x i8>* %q
310 define void @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
311 ; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison(
313 ; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[IDX:%.*]]
314 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[TMP0]], 7
315 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
316 ; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP1]], align 1
317 ; CHECK-NEXT: ret void
320 %0 = load <16 x i8>, <16 x i8>* %q
321 %idx.clamped = and i32 %idx, 7
322 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
323 store <16 x i8> %vecins, <16 x i8>* %q
327 define void @insert_store_nonconst_index_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
328 ; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_and(
330 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
331 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16
332 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
333 ; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
334 ; CHECK-NEXT: ret void
337 %0 = load <16 x i8>, <16 x i8>* %q
338 %idx.clamped = and i32 %idx, 16
339 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
340 store <16 x i8> %vecins, <16 x i8>* %q
344 define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
345 ; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_and(
347 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
348 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16
349 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
350 ; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
351 ; CHECK-NEXT: ret void
354 %0 = load <16 x i8>, <16 x i8>* %q
355 %idx.clamped = and i32 %idx, 16
356 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
357 store <16 x i8> %vecins, <16 x i8>* %q
360 define void @insert_store_nonconst_index_known_noundef_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
361 ; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_urem(
363 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
364 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
365 ; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
366 ; CHECK-NEXT: ret void
369 %0 = load <16 x i8>, <16 x i8>* %q
370 %idx.clamped = urem i32 %idx, 16
371 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
372 store <16 x i8> %vecins, <16 x i8>* %q
376 define void @insert_store_nonconst_index_base_frozen_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
377 ; CHECK-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_urem(
379 ; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]]
380 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX_FROZEN]], 16
381 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
382 ; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
383 ; CHECK-NEXT: ret void
386 %0 = load <16 x i8>, <16 x i8>* %q
387 %idx.frozen = freeze i32 %idx
388 %idx.clamped = urem i32 %idx.frozen, 16
389 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
390 store <16 x i8> %vecins, <16 x i8>* %q
394 define void @insert_store_nonconst_index_frozen_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
395 ; CHECK-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_urem(
397 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
398 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
399 ; CHECK-NEXT: [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]]
400 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]]
401 ; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
402 ; CHECK-NEXT: ret void
405 %0 = load <16 x i8>, <16 x i8>* %q
406 %idx.clamped = urem i32 %idx, 16
407 %idx.clamped.frozen = freeze i32 %idx.clamped
408 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped.frozen
409 store <16 x i8> %vecins, <16 x i8>* %q
413 define void @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
414 ; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison(
416 ; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[IDX:%.*]]
417 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[TMP0]], 16
418 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
419 ; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP1]], align 1
420 ; CHECK-NEXT: ret void
423 %0 = load <16 x i8>, <16 x i8>* %q
424 %idx.clamped = urem i32 %idx, 16
425 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
426 store <16 x i8> %vecins, <16 x i8>* %q
430 define void @insert_store_nonconst_index_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
431 ; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_urem(
433 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
434 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17
435 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
436 ; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
437 ; CHECK-NEXT: ret void
440 %0 = load <16 x i8>, <16 x i8>* %q
441 %idx.clamped = urem i32 %idx, 17
442 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
443 store <16 x i8> %vecins, <16 x i8>* %q
447 define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
448 ; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem(
450 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
451 ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17
452 ; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
453 ; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
454 ; CHECK-NEXT: ret void
457 %0 = load <16 x i8>, <16 x i8>* %q
458 %idx.clamped = urem i32 %idx, 17
459 %vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
460 store <16 x i8> %vecins, <16 x i8>* %q
464 define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s) {
465 ; CHECK-LABEL: @insert_store_ptr_strip(
467 ; CHECK-NEXT: [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>*
468 ; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[ADDR0]], i64 0
469 ; CHECK-NEXT: [[ADDR2:%.*]] = bitcast <2 x i64>* [[ADDR1]] to <16 x i8>*
470 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[ADDR2]], i32 0, i32 3
471 ; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
472 ; CHECK-NEXT: ret void
475 %0 = load <16 x i8>, <16 x i8>* %q
476 %vecins = insertelement <16 x i8> %0, i8 %s, i32 3
477 %addr0 = bitcast <16 x i8>* %q to <2 x i64>*
478 %addr1 = getelementptr <2 x i64>, <2 x i64>* %addr0, i64 0
479 %addr2 = bitcast <2 x i64>* %addr1 to <16 x i8>*
480 store <16 x i8> %vecins, <16 x i8>* %addr2
484 define void @volatile_update(<16 x i8>* %q, <16 x i8>* %p, i8 zeroext %s) {
485 ; CHECK-LABEL: @volatile_update(
487 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
488 ; CHECK-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 3
489 ; CHECK-NEXT: store volatile <16 x i8> [[VECINS0]], <16 x i8>* [[Q]], align 16
490 ; CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, <16 x i8>* [[P:%.*]], align 16
491 ; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[S]], i32 1
492 ; CHECK-NEXT: store <16 x i8> [[VECINS1]], <16 x i8>* [[P]], align 16
493 ; CHECK-NEXT: ret void
496 %0 = load <16 x i8>, <16 x i8>* %q
497 %vecins0 = insertelement <16 x i8> %0, i8 %s, i32 3
498 store volatile <16 x i8> %vecins0, <16 x i8>* %q
500 %1 = load volatile <16 x i8>, <16 x i8>* %p
501 %vecins1 = insertelement <16 x i8> %1, i8 %s, i32 1
502 store <16 x i8> %vecins1, <16 x i8>* %p
506 define void @insert_store_addr_differ(<16 x i8>* %p, <16 x i8>* %q, i8 %s) {
507 ; CHECK-LABEL: @insert_store_addr_differ(
509 ; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
510 ; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
511 ; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[Q:%.*]], align 16
512 ; CHECK-NEXT: ret void
515 %ld = load <16 x i8>, <16 x i8>* %p
516 %ins = insertelement <16 x i8> %ld, i8 %s, i32 3
517 store <16 x i8> %ins, <16 x i8>* %q
521 ; We can't transform if any instr could modify memory in between.
522 define void @insert_store_mem_modify(<16 x i8>* %p, <16 x i8>* %q, <16 x i8>* noalias %r, i8 %s, i32 %m) {
523 ; CHECK-LABEL: @insert_store_mem_modify(
525 ; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
526 ; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[Q:%.*]], align 16
527 ; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
528 ; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16
529 ; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[R:%.*]], align 16
530 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 7
531 ; CHECK-NEXT: store i8 [[S]], i8* [[TMP0]], align 1
532 ; CHECK-NEXT: [[PTR0:%.*]] = bitcast <16 x i8>* [[P]] to <4 x i32>*
533 ; CHECK-NEXT: [[LD3:%.*]] = load <4 x i32>, <4 x i32>* [[PTR0]], align 16
534 ; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[P]], align 16
535 ; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i32> [[LD3]], i32 [[M:%.*]], i32 0
536 ; CHECK-NEXT: store <4 x i32> [[INS3]], <4 x i32>* [[PTR0]], align 16
537 ; CHECK-NEXT: ret void
541 %ld = load <16 x i8>, <16 x i8>* %p
542 store <16 x i8> zeroinitializer, <16 x i8>* %q
543 %ins = insertelement <16 x i8> %ld, i8 %s, i32 3
544 store <16 x i8> %ins, <16 x i8>* %p
547 %ld2 = load <16 x i8>, <16 x i8>* %q
548 store <16 x i8> zeroinitializer, <16 x i8>* %r
549 %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7
550 store <16 x i8> %ins2, <16 x i8>* %q
553 %ptr0 = bitcast <16 x i8>* %p to <4 x i32>*
554 %ld3 = load <4 x i32>, <4 x i32>* %ptr0
555 store <16 x i8> zeroinitializer, <16 x i8>* %p
556 %ins3 = insertelement <4 x i32> %ld3, i32 %m, i32 0
557 store <4 x i32> %ins3, <4 x i32>* %ptr0
562 ; Check cases when calls may modify memory
563 define void @insert_store_with_call(<16 x i8>* %p, <16 x i8>* %q, i8 %s) {
564 ; CHECK-LABEL: @insert_store_with_call(
566 ; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
567 ; CHECK-NEXT: call void @maywrite(<16 x i8>* [[P]])
568 ; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
569 ; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16
570 ; CHECK-NEXT: call void @foo()
571 ; CHECK-NEXT: call void @nowrite(<16 x i8>* [[P]])
572 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P]], i32 0, i32 7
573 ; CHECK-NEXT: store i8 [[S]], i8* [[TMP0]], align 1
574 ; CHECK-NEXT: ret void
577 %ld = load <16 x i8>, <16 x i8>* %p
578 call void @maywrite(<16 x i8>* %p)
579 %ins = insertelement <16 x i8> %ld, i8 %s, i32 3
580 store <16 x i8> %ins, <16 x i8>* %p
581 call void @foo() ; Barrier
582 %ld2 = load <16 x i8>, <16 x i8>* %p
583 call void @nowrite(<16 x i8>* %p)
584 %ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7
585 store <16 x i8> %ins2, <16 x i8>* %p
590 declare void @maywrite(<16 x i8>*)
591 declare void @nowrite(<16 x i8>*) readonly
593 ; To test if number of instructions in-between exceeds the limit (default 30),
594 ; the combine will quit.
595 define i32 @insert_store_maximum_scan_instrs(i32 %arg, i16* %arg1, <16 x i8>* %arg2, i8 zeroext %arg3) {
596 ; CHECK-LABEL: @insert_store_maximum_scan_instrs(
598 ; CHECK-NEXT: [[I:%.*]] = or i32 [[ARG:%.*]], 1
599 ; CHECK-NEXT: [[I4:%.*]] = load <16 x i8>, <16 x i8>* [[ARG2:%.*]], align 16
600 ; CHECK-NEXT: [[I5:%.*]] = tail call i32 @bar(i32 [[I]], i1 true)
601 ; CHECK-NEXT: [[I6:%.*]] = shl i32 [[ARG]], [[I5]]
602 ; CHECK-NEXT: [[I7:%.*]] = lshr i32 [[I6]], 26
603 ; CHECK-NEXT: [[I8:%.*]] = trunc i32 [[I7]] to i8
604 ; CHECK-NEXT: [[I9:%.*]] = and i8 [[I8]], 31
605 ; CHECK-NEXT: [[I10:%.*]] = lshr i32 [[I6]], 11
606 ; CHECK-NEXT: [[I11:%.*]] = and i32 [[I10]], 32767
607 ; CHECK-NEXT: [[I12:%.*]] = zext i8 [[I9]] to i64
608 ; CHECK-NEXT: [[I13:%.*]] = getelementptr inbounds i16, i16* [[ARG1:%.*]], i64 [[I12]]
609 ; CHECK-NEXT: [[I14:%.*]] = load i16, i16* [[I13]], align 2
610 ; CHECK-NEXT: [[I15:%.*]] = zext i16 [[I14]] to i32
611 ; CHECK-NEXT: [[I16:%.*]] = add nuw nsw i8 [[I9]], 1
612 ; CHECK-NEXT: [[I17:%.*]] = zext i8 [[I16]] to i64
613 ; CHECK-NEXT: [[I18:%.*]] = getelementptr inbounds i16, i16* [[ARG1]], i64 [[I17]]
614 ; CHECK-NEXT: [[I19:%.*]] = load i16, i16* [[I18]], align 2
615 ; CHECK-NEXT: [[I20:%.*]] = zext i16 [[I19]] to i32
616 ; CHECK-NEXT: [[I21:%.*]] = sub nsw i32 [[I20]], [[I15]]
617 ; CHECK-NEXT: [[I22:%.*]] = mul nsw i32 [[I11]], [[I21]]
618 ; CHECK-NEXT: [[I23:%.*]] = ashr i32 [[I22]], 15
619 ; CHECK-NEXT: [[I24:%.*]] = shl nuw nsw i32 [[I5]], 15
620 ; CHECK-NEXT: [[I25:%.*]] = xor i32 [[I24]], 1015808
621 ; CHECK-NEXT: [[I26:%.*]] = add nuw nsw i32 [[I25]], [[I15]]
622 ; CHECK-NEXT: [[I27:%.*]] = add nsw i32 [[I26]], [[I23]]
623 ; CHECK-NEXT: [[I28:%.*]] = sitofp i32 [[ARG]] to double
624 ; CHECK-NEXT: [[I29:%.*]] = tail call double @llvm.log2.f64(double [[I28]])
625 ; CHECK-NEXT: [[I30:%.*]] = fptosi double [[I29]] to i32
626 ; CHECK-NEXT: [[I31:%.*]] = shl nsw i32 [[I30]], 15
627 ; CHECK-NEXT: [[I32:%.*]] = or i32 [[I31]], 4
628 ; CHECK-NEXT: [[I33:%.*]] = icmp eq i32 [[I27]], [[I32]]
629 ; CHECK-NEXT: [[I34:%.*]] = select i1 [[I33]], i32 [[ARG]], i32 [[I31]]
630 ; CHECK-NEXT: [[I35:%.*]] = lshr i32 [[I34]], 1
631 ; CHECK-NEXT: [[I36:%.*]] = insertelement <16 x i8> [[I4]], i8 [[ARG3:%.*]], i32 3
632 ; CHECK-NEXT: store <16 x i8> [[I36]], <16 x i8>* [[ARG2]], align 16
633 ; CHECK-NEXT: ret i32 [[I35]]
637 %i4 = load <16 x i8>, <16 x i8>* %arg2, align 16
638 %i5 = tail call i32 @bar(i32 %i, i1 true)
639 %i6 = shl i32 %arg, %i5
640 %i7 = lshr i32 %i6, 26
641 %i8 = trunc i32 %i7 to i8
643 %i10 = lshr i32 %i6, 11
644 %i11 = and i32 %i10, 32767
645 %i12 = zext i8 %i9 to i64
646 %i13 = getelementptr inbounds i16, i16* %arg1, i64 %i12
647 %i14 = load i16, i16* %i13, align 2
648 %i15 = zext i16 %i14 to i32
649 %i16 = add nuw nsw i8 %i9, 1
650 %i17 = zext i8 %i16 to i64
651 %i18 = getelementptr inbounds i16, i16* %arg1, i64 %i17
652 %i19 = load i16, i16* %i18, align 2
653 %i20 = zext i16 %i19 to i32
654 %i21 = sub nsw i32 %i20, %i15
655 %i22 = mul nsw i32 %i11, %i21
656 %i23 = ashr i32 %i22, 15
657 %i24 = shl nuw nsw i32 %i5, 15
658 %i25 = xor i32 %i24, 1015808
659 %i26 = add nuw nsw i32 %i25, %i15
660 %i27 = add nsw i32 %i26, %i23
661 %i28 = sitofp i32 %arg to double
662 %i29 = tail call double @llvm.log2.f64(double %i28)
663 %i30 = fptosi double %i29 to i32
664 %i31 = shl nsw i32 %i30, 15
665 %i32 = or i32 %i31, 4
666 %i33 = icmp eq i32 %i27, %i32
667 %i34 = select i1 %i33, i32 %arg, i32 %i31
668 %i35 = lshr i32 %i34, 1
669 %i36 = insertelement <16 x i8> %i4, i8 %arg3, i32 3
670 store <16 x i8> %i36, <16 x i8>* %arg2, align 16
674 declare i32 @bar(i32, i1) readonly
675 declare double @llvm.log2.f64(double)