1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
4 ; Try to eliminate binops and shuffles when the shuffle is a select in disguise:
5 ; PR37806 - https://bugs.llvm.org/show_bug.cgi?id=37806
7 define <4 x i32> @add(<4 x i32> %v) {
9 ; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0>
10 ; CHECK-NEXT: ret <4 x i32> [[S]]
12 %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
13 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
17 ; Propagate flags when possible.
19 define <4 x i32> @add_nuw_nsw(<4 x i32> %v) {
20 ; CHECK-LABEL: @add_nuw_nsw(
21 ; CHECK-NEXT: [[S:%.*]] = add nuw nsw <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0>
22 ; CHECK-NEXT: ret <4 x i32> [[S]]
24 %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
25 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
29 define <4 x i32> @add_undef_mask_elt(<4 x i32> %v) {
30 ; CHECK-LABEL: @add_undef_mask_elt(
31 ; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 undef, i32 0>
32 ; CHECK-NEXT: ret <4 x i32> [[S]]
34 %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
35 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
39 ; Poison flags must be dropped or undef must be replaced with safe constant.
41 define <4 x i32> @add_nuw_nsw_undef_mask_elt(<4 x i32> %v) {
42 ; CHECK-LABEL: @add_nuw_nsw_undef_mask_elt(
43 ; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 undef, i32 13, i32 0>
44 ; CHECK-NEXT: ret <4 x i32> [[S]]
46 %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
47 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
51 ; Constant operand 0 (LHS) could work for some non-commutative binops?
53 define <4 x i32> @sub(<4 x i32> %v) {
55 ; CHECK-NEXT: [[B:%.*]] = sub <4 x i32> <i32 poison, i32 poison, i32 poison, i32 14>, [[V:%.*]]
56 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
57 ; CHECK-NEXT: ret <4 x i32> [[S]]
59 %b = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
60 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
64 ; If any element of the shuffle mask operand is undef, that element of the result is undef.
65 ; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
66 ; Preserve flags when possible. It's not safe to propagate poison-generating flags with undef constants.
68 define <4 x i32> @mul(<4 x i32> %v) {
70 ; CHECK-NEXT: [[S:%.*]] = mul <4 x i32> [[V:%.*]], <i32 undef, i32 12, i32 1, i32 14>
71 ; CHECK-NEXT: ret <4 x i32> [[S]]
73 %b = mul nsw nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
74 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
78 define <4 x i32> @shl(<4 x i32> %v) {
80 ; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
81 ; CHECK-NEXT: ret <4 x i32> [[S]]
83 %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
84 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
88 define <4 x i32> @shl_nsw(<4 x i32> %v) {
89 ; CHECK-LABEL: @shl_nsw(
90 ; CHECK-NEXT: [[S:%.*]] = shl nsw <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
91 ; CHECK-NEXT: ret <4 x i32> [[S]]
93 %b = shl nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
94 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
98 define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) {
99 ; CHECK-LABEL: @shl_undef_mask_elt(
100 ; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
101 ; CHECK-NEXT: ret <4 x i32> [[S]]
103 %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
104 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
108 define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) {
109 ; CHECK-LABEL: @shl_nuw_undef_mask_elt(
110 ; CHECK-NEXT: [[S:%.*]] = shl nuw <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
111 ; CHECK-NEXT: ret <4 x i32> [[S]]
113 %b = shl nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
114 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
118 define <4 x i32> @lshr_constant_op0(<4 x i32> %v) {
119 ; CHECK-LABEL: @lshr_constant_op0(
120 ; CHECK-NEXT: [[S:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
121 ; CHECK-NEXT: ret <4 x i32> [[S]]
123 %b = lshr <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
124 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
128 define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) {
129 ; CHECK-LABEL: @lshr_exact_constant_op0(
130 ; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
131 ; CHECK-NEXT: ret <4 x i32> [[S]]
133 %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
134 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
138 define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) {
139 ; CHECK-LABEL: @lshr_undef_mask_elt(
140 ; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
141 ; CHECK-NEXT: ret <4 x i32> [[S]]
143 %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
144 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
148 define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) {
149 ; CHECK-LABEL: @lshr_exact_undef_mask_elt(
150 ; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
151 ; CHECK-NEXT: ret <4 x i32> [[S]]
153 %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
154 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
158 define <4 x i32> @lshr_constant_op1(<4 x i32> %v) {
159 ; CHECK-LABEL: @lshr_constant_op1(
160 ; CHECK-NEXT: [[B:%.*]] = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
161 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
162 ; CHECK-NEXT: ret <4 x i32> [[S]]
164 %b = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
165 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
171 define <3 x i32> @ashr(<3 x i32> %v) {
172 ; CHECK-LABEL: @ashr(
173 ; CHECK-NEXT: [[S:%.*]] = ashr <3 x i32> [[V:%.*]], <i32 0, i32 12, i32 13>
174 ; CHECK-NEXT: ret <3 x i32> [[S]]
176 %b = ashr <3 x i32> %v, <i32 11, i32 12, i32 13>
177 %s = shufflevector <3 x i32> %b, <3 x i32> %v, <3 x i32> <i32 3, i32 1, i32 2>
181 define <3 x i42> @and(<3 x i42> %v) {
183 ; CHECK-NEXT: [[S:%.*]] = and <3 x i42> [[V:%.*]], <i42 -1, i42 12, i42 undef>
184 ; CHECK-NEXT: ret <3 x i42> [[S]]
186 %b = and <3 x i42> %v, <i42 11, i42 12, i42 13>
187 %s = shufflevector <3 x i42> %v, <3 x i42> %b, <3 x i32> <i32 0, i32 4, i32 undef>
191 ; It doesn't matter if the intermediate op has extra uses.
193 declare void @use_v4i32(<4 x i32>)
195 define <4 x i32> @or(<4 x i32> %v) {
197 ; CHECK-NEXT: [[B:%.*]] = or <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
198 ; CHECK-NEXT: [[S:%.*]] = or <4 x i32> [[V]], <i32 0, i32 0, i32 13, i32 14>
199 ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[B]])
200 ; CHECK-NEXT: ret <4 x i32> [[S]]
202 %b = or <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
203 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
204 call void @use_v4i32(<4 x i32> %b)
208 define <4 x i32> @xor(<4 x i32> %v) {
210 ; CHECK-NEXT: [[S:%.*]] = xor <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 0, i32 0>
211 ; CHECK-NEXT: ret <4 x i32> [[S]]
213 %b = xor <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
214 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
218 define <4 x i32> @udiv(<4 x i32> %v) {
219 ; CHECK-LABEL: @udiv(
220 ; CHECK-NEXT: [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
221 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
222 ; CHECK-NEXT: ret <4 x i32> [[S]]
224 %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
225 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
229 define <4 x i32> @udiv_exact(<4 x i32> %v) {
230 ; CHECK-LABEL: @udiv_exact(
231 ; CHECK-NEXT: [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
232 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
233 ; CHECK-NEXT: ret <4 x i32> [[S]]
235 %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
236 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
240 define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) {
241 ; CHECK-LABEL: @udiv_undef_mask_elt(
242 ; CHECK-NEXT: [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
243 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
244 ; CHECK-NEXT: ret <4 x i32> [[S]]
246 %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
247 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
251 define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) {
252 ; CHECK-LABEL: @udiv_exact_undef_mask_elt(
253 ; CHECK-NEXT: [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
254 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
255 ; CHECK-NEXT: ret <4 x i32> [[S]]
257 %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
258 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
262 define <4 x i32> @sdiv(<4 x i32> %v) {
263 ; CHECK-LABEL: @sdiv(
264 ; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
265 ; CHECK-NEXT: ret <4 x i32> [[S]]
267 %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
268 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
272 define <4 x i32> @sdiv_exact(<4 x i32> %v) {
273 ; CHECK-LABEL: @sdiv_exact(
274 ; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
275 ; CHECK-NEXT: ret <4 x i32> [[S]]
277 %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
278 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
282 ; Div/rem need special handling if the shuffle has undef elements.
284 define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) {
285 ; CHECK-LABEL: @sdiv_undef_mask_elt(
286 ; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
287 ; CHECK-NEXT: ret <4 x i32> [[S]]
289 %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
290 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
294 define <4 x i32> @sdiv_exact_undef_mask_elt(<4 x i32> %v) {
295 ; CHECK-LABEL: @sdiv_exact_undef_mask_elt(
296 ; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
297 ; CHECK-NEXT: ret <4 x i32> [[S]]
299 %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
300 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
304 define <4 x i32> @urem(<4 x i32> %v) {
305 ; CHECK-LABEL: @urem(
306 ; CHECK-NEXT: [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
307 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
308 ; CHECK-NEXT: ret <4 x i32> [[S]]
310 %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
311 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
315 define <4 x i32> @urem_undef_mask_elt(<4 x i32> %v) {
316 ; CHECK-LABEL: @urem_undef_mask_elt(
317 ; CHECK-NEXT: [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
318 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
319 ; CHECK-NEXT: ret <4 x i32> [[S]]
321 %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
322 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
326 define <4 x i32> @srem(<4 x i32> %v) {
327 ; CHECK-LABEL: @srem(
328 ; CHECK-NEXT: [[B:%.*]] = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
329 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
330 ; CHECK-NEXT: ret <4 x i32> [[S]]
332 %b = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
333 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
339 define <4 x float> @fadd(<4 x float> %v) {
340 ; CHECK-LABEL: @fadd(
341 ; CHECK-NEXT: [[S:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float -0.000000e+00, float -0.000000e+00>
342 ; CHECK-NEXT: ret <4 x float> [[S]]
344 %b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
345 %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
349 define <4 x double> @fsub(<4 x double> %v) {
350 ; CHECK-LABEL: @fsub(
351 ; CHECK-NEXT: [[B:%.*]] = fsub <4 x double> <double poison, double poison, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]]
352 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
353 ; CHECK-NEXT: ret <4 x double> [[S]]
355 %b = fsub <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
356 %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
362 define <4 x float> @fmul(<4 x float> %v) {
363 ; CHECK-LABEL: @fmul(
364 ; CHECK-NEXT: [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], <float 4.100000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
365 ; CHECK-NEXT: ret <4 x float> [[S]]
367 %b = fmul nnan ninf <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
368 %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
372 define <4 x double> @fdiv_constant_op0(<4 x double> %v) {
373 ; CHECK-LABEL: @fdiv_constant_op0(
374 ; CHECK-NEXT: [[B:%.*]] = fdiv fast <4 x double> <double poison, double poison, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]]
375 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
376 ; CHECK-NEXT: ret <4 x double> [[S]]
378 %b = fdiv fast <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
379 %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
383 define <4 x double> @fdiv_constant_op1(<4 x double> %v) {
384 ; CHECK-LABEL: @fdiv_constant_op1(
385 ; CHECK-NEXT: [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01>
386 ; CHECK-NEXT: ret <4 x double> [[S]]
388 %b = fdiv reassoc <4 x double> %v, <double 41.0, double 42.0, double 43.0, double 44.0>
389 %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
393 define <4 x double> @frem(<4 x double> %v) {
394 ; CHECK-LABEL: @frem(
395 ; CHECK-NEXT: [[B:%.*]] = frem <4 x double> <double 4.100000e+01, double 4.200000e+01, double poison, double poison>, [[V:%.*]]
396 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
397 ; CHECK-NEXT: ret <4 x double> [[S]]
399 %b = frem <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
400 %s = shufflevector <4 x double> %b, <4 x double> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
404 ; Tests where both operands of the shuffle are binops with the same opcode.
406 define <4 x i32> @add_add(<4 x i32> %v0) {
407 ; CHECK-LABEL: @add_add(
408 ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8>
409 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
411 %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
412 %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
413 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
417 define <4 x i32> @add_add_nsw(<4 x i32> %v0) {
418 ; CHECK-LABEL: @add_add_nsw(
419 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8>
420 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
422 %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
423 %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
424 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
428 define <4 x i32> @add_add_undef_mask_elt(<4 x i32> %v0) {
429 ; CHECK-LABEL: @add_add_undef_mask_elt(
430 ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8>
431 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
433 %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
434 %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
435 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
439 ; Poison flags must be dropped or undef must be replaced with safe constant.
441 define <4 x i32> @add_add_nsw_undef_mask_elt(<4 x i32> %v0) {
442 ; CHECK-LABEL: @add_add_nsw_undef_mask_elt(
443 ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8>
444 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
446 %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
447 %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
448 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
452 ; Constant operand 0 (LHS) also works.
454 define <4 x i32> @sub_sub(<4 x i32> %v0) {
455 ; CHECK-LABEL: @sub_sub(
456 ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]]
457 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
459 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
460 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
461 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
465 define <4 x i32> @sub_sub_nuw(<4 x i32> %v0) {
466 ; CHECK-LABEL: @sub_sub_nuw(
467 ; CHECK-NEXT: [[TMP1:%.*]] = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]]
468 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
470 %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
471 %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
472 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
476 define <4 x i32> @sub_sub_undef_mask_elt(<4 x i32> %v0) {
477 ; CHECK-LABEL: @sub_sub_undef_mask_elt(
478 ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]]
479 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
481 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
482 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
483 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
487 ; Poison flags must be dropped or undef must be replaced with safe constant.
489 define <4 x i32> @sub_sub_nuw_undef_mask_elt(<4 x i32> %v0) {
490 ; CHECK-LABEL: @sub_sub_nuw_undef_mask_elt(
491 ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]]
492 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
494 %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
495 %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
496 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
500 ; If any element of the shuffle mask operand is undef, that element of the result is undef.
501 ; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
503 define <4 x i32> @mul_mul(<4 x i32> %v0) {
504 ; CHECK-LABEL: @mul_mul(
505 ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 undef, i32 6, i32 3, i32 8>
506 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
508 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
509 %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
510 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
514 ; Preserve flags when possible.
516 define <4 x i32> @shl_shl(<4 x i32> %v0) {
517 ; CHECK-LABEL: @shl_shl(
518 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4>
519 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
521 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
522 %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
523 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
527 define <4 x i32> @shl_shl_nuw(<4 x i32> %v0) {
528 ; CHECK-LABEL: @shl_shl_nuw(
529 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4>
530 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
532 %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
533 %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
534 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
538 ; Shift by undef is poison. Undef must be replaced by safe constant.
540 define <4 x i32> @shl_shl_undef_mask_elt(<4 x i32> %v0) {
541 ; CHECK-LABEL: @shl_shl_undef_mask_elt(
542 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0>
543 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
545 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
546 %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
547 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
551 ; Shift by undef is poison. Undef must be replaced by safe constant.
553 define <4 x i32> @shl_shl_nuw_undef_mask_elt(<4 x i32> %v0) {
554 ; CHECK-LABEL: @shl_shl_nuw_undef_mask_elt(
555 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0>
556 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
558 %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
559 %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
560 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
564 ; Can't propagate the flag here.
566 define <4 x i32> @lshr_lshr(<4 x i32> %v0) {
567 ; CHECK-LABEL: @lshr_lshr(
568 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[V0:%.*]]
569 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
571 %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
572 %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
573 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
579 define <3 x i32> @ashr_ashr(<3 x i32> %v0) {
580 ; CHECK-LABEL: @ashr_ashr(
581 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[V0:%.*]], <i32 4, i32 2, i32 3>
582 ; CHECK-NEXT: ret <3 x i32> [[TMP1]]
584 %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3>
585 %t2 = ashr <3 x i32> %v0, <i32 4, i32 5, i32 6>
586 %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2>
590 define <3 x i42> @and_and(<3 x i42> %v0) {
591 ; CHECK-LABEL: @and_and(
592 ; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i42> [[V0:%.*]], <i42 1, i42 5, i42 undef>
593 ; CHECK-NEXT: ret <3 x i42> [[TMP1]]
595 %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3>
596 %t2 = and <3 x i42> %v0, <i42 4, i42 5, i42 6>
597 %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef>
601 ; It doesn't matter if the intermediate ops have extra uses.
603 define <4 x i32> @or_or(<4 x i32> %v0) {
604 ; CHECK-LABEL: @or_or(
605 ; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
606 ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[V0]], <i32 5, i32 6, i32 3, i32 4>
607 ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]])
608 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
610 %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
611 %t2 = or <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
612 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
613 call void @use_v4i32(<4 x i32> %t1)
617 define <4 x i32> @xor_xor(<4 x i32> %v0) {
618 ; CHECK-LABEL: @xor_xor(
619 ; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 7, i32 8>
620 ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[V0]], <i32 1, i32 6, i32 3, i32 4>
621 ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]])
622 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
624 %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
625 %t2 = xor <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
626 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
627 call void @use_v4i32(<4 x i32> %t2)
631 define <4 x i32> @udiv_udiv(<4 x i32> %v0) {
632 ; CHECK-LABEL: @udiv_udiv(
633 ; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
634 ; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V0]]
635 ; CHECK-NEXT: [[TMP1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0]]
636 ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]])
637 ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]])
638 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
640 %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
641 %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
642 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
643 call void @use_v4i32(<4 x i32> %t1)
644 call void @use_v4i32(<4 x i32> %t2)
648 ; Div/rem need special handling if the shuffle has undef elements.
650 define <4 x i32> @sdiv_sdiv(<4 x i32> %v0) {
651 ; CHECK-LABEL: @sdiv_sdiv(
652 ; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8>
653 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
655 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
656 %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
657 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
661 define <4 x i32> @sdiv_sdiv_exact(<4 x i32> %v0) {
662 ; CHECK-LABEL: @sdiv_sdiv_exact(
663 ; CHECK-NEXT: [[TMP1:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8>
664 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
666 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
667 %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
668 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
672 define <4 x i32> @sdiv_sdiv_undef_mask_elt(<4 x i32> %v0) {
673 ; CHECK-LABEL: @sdiv_sdiv_undef_mask_elt(
674 ; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1>
675 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
677 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
678 %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
679 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
683 define <4 x i32> @sdiv_sdiv_exact_undef_mask_elt(<4 x i32> %v0) {
684 ; CHECK-LABEL: @sdiv_sdiv_exact_undef_mask_elt(
685 ; CHECK-NEXT: [[TMP1:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1>
686 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
688 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
689 %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
690 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
694 define <4 x i32> @urem_urem(<4 x i32> %v0) {
695 ; CHECK-LABEL: @urem_urem(
696 ; CHECK-NEXT: [[TMP1:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[V0:%.*]]
697 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
699 %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
700 %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
701 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
705 ; This is folded by using a safe constant.
707 define <4 x i32> @urem_urem_undef_mask_elt(<4 x i32> %v0) {
708 ; CHECK-LABEL: @urem_urem_undef_mask_elt(
709 ; CHECK-NEXT: [[TMP1:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 0>, [[V0:%.*]]
710 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
712 %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
713 %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
714 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
718 define <4 x i32> @srem_srem(<4 x i32> %v0) {
719 ; CHECK-LABEL: @srem_srem(
720 ; CHECK-NEXT: [[TMP1:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 7, i32 4>, [[V0:%.*]]
721 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
723 %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
724 %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
725 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
729 ; This is folded by using a safe constant.
731 define <4 x i32> @srem_srem_undef_mask_elt(<4 x i32> %v0) {
732 ; CHECK-LABEL: @srem_srem_undef_mask_elt(
733 ; CHECK-NEXT: [[TMP1:%.*]] = srem <4 x i32> <i32 1, i32 0, i32 7, i32 4>, [[V0:%.*]]
734 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
736 %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
737 %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
738 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
744 define <4 x float> @fadd_fadd(<4 x float> %v0) {
745 ; CHECK-LABEL: @fadd_fadd(
746 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[V0:%.*]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00>
747 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
749 %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
750 %t2 = fadd <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0>
751 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
755 define <4 x double> @fsub_fsub(<4 x double> %v0) {
756 ; CHECK-LABEL: @fsub_fsub(
757 ; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]]
758 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
760 %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
761 %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0
762 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
768 define <4 x float> @fmul_fmul(<4 x float> %v0) {
769 ; CHECK-LABEL: @fmul_fmul(
770 ; CHECK-NEXT: [[TMP1:%.*]] = fmul nnan ninf <4 x float> [[V0:%.*]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
771 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
773 %t1 = fmul nnan ninf <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
774 %t2 = fmul nnan ninf <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0>
775 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
779 define <4 x double> @fdiv_fdiv(<4 x double> %v0) {
780 ; CHECK-LABEL: @fdiv_fdiv(
781 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv arcp <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]]
782 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
784 %t1 = fdiv fast <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
785 %t2 = fdiv nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0
786 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
790 ; The variable operand must be either the first operand or second operand in both binops.
792 define <4 x double> @frem_frem(<4 x double> %v0) {
793 ; CHECK-LABEL: @frem_frem(
794 ; CHECK-NEXT: [[T1:%.*]] = frem <4 x double> <double 1.000000e+00, double 2.000000e+00, double poison, double poison>, [[V0:%.*]]
795 ; CHECK-NEXT: [[T2:%.*]] = frem <4 x double> [[V0]], <double poison, double poison, double 7.000000e+00, double 8.000000e+00>
796 ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
797 ; CHECK-NEXT: ret <4 x double> [[T3]]
799 %t1 = frem <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
800 %t2 = frem <4 x double> %v0, <double 5.0, double 6.0, double 7.0, double 8.0>
801 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
805 define <4 x i32> @add_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
806 ; CHECK-LABEL: @add_2_vars(
807 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
808 ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
809 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
811 %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
812 %t2 = add <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
813 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
817 ; Constant operand 0 (LHS) also works.
819 define <4 x i32> @sub_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
820 ; CHECK-LABEL: @sub_2_vars(
821 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
822 ; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]]
823 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
825 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
826 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
827 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
831 define <4 x i32> @sub_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) {
832 ; CHECK-LABEL: @sub_2_vars_nsw(
833 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
834 ; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]]
835 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
837 %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
838 %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
839 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
843 define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
844 ; CHECK-LABEL: @sub_2_vars_undef_mask_elt(
845 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
846 ; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]]
847 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
849 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
850 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
851 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
855 ; Poison flags must be dropped or undef must be replaced with safe constant.
857 define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
858 ; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt(
859 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
860 ; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]]
861 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
863 %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
864 %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
865 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
869 ; If any element of the shuffle mask operand is undef, that element of the result is undef.
870 ; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
872 define <4 x i32> @mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
873 ; CHECK-LABEL: @mul_2_vars(
874 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
875 ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
876 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
878 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
879 %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
880 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
884 define <4 x i32> @mul_2_vars_nuw(<4 x i32> %v0, <4 x i32> %v1) {
885 ; CHECK-LABEL: @mul_2_vars_nuw(
886 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
887 ; CHECK-NEXT: [[TMP2:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
888 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
890 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
891 %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
892 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
896 define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
897 ; CHECK-LABEL: @mul_2_vars_undef_mask_elt(
898 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
899 ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8>
900 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
902 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
903 %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
904 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
908 ; Poison flags must be dropped or undef must be replaced with safe constant.
910 define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
911 ; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt(
912 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
913 ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8>
914 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
916 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
917 %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
918 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
922 ; Preserve flags when possible.
924 define <4 x i32> @shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
925 ; CHECK-LABEL: @shl_2_vars(
926 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
927 ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4>
928 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
930 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
931 %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
932 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
936 define <4 x i32> @shl_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) {
937 ; CHECK-LABEL: @shl_2_vars_nsw(
938 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
939 ; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4>
940 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
942 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
943 %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
944 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
948 ; Shift by undef is poison. Undef is replaced by safe constant.
950 define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
951 ; CHECK-LABEL: @shl_2_vars_undef_mask_elt(
952 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
953 ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0>
954 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
956 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
957 %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
958 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
962 ; Shift by undef is poison. Undef is replaced by safe constant.
964 define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
965 ; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt(
966 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
967 ; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0>
968 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
970 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
971 %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
972 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
976 ; Can't propagate the flag here.
978 define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
979 ; CHECK-LABEL: @lshr_2_vars(
980 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
981 ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]]
982 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
984 %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
985 %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
986 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
990 define <4 x i32> @lshr_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
991 ; CHECK-LABEL: @lshr_2_vars_exact(
992 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
993 ; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]]
994 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
996 %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
997 %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
998 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
1002 ; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef.
1004 define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1005 ; CHECK-LABEL: @lshr_2_vars_undef_mask_elt(
1006 ; CHECK-NEXT: [[T1:%.*]] = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1007 ; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1008 ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
1009 ; CHECK-NEXT: ret <4 x i32> [[T3]]
1011 %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1012 %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1013 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
1017 ; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef.
1019 define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1020 ; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt(
1021 ; CHECK-NEXT: [[T1:%.*]] = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1022 ; CHECK-NEXT: [[T2:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1023 ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
1024 ; CHECK-NEXT: ret <4 x i32> [[T3]]
1026 %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1027 %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1028 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
1034 define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) {
1035 ; CHECK-LABEL: @ashr_2_vars(
1036 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[V1:%.*]], <3 x i32> [[V0:%.*]], <3 x i32> <i32 0, i32 4, i32 5>
1037 ; CHECK-NEXT: [[TMP2:%.*]] = ashr <3 x i32> [[TMP1]], <i32 4, i32 2, i32 3>
1038 ; CHECK-NEXT: ret <3 x i32> [[TMP2]]
1040 %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3>
1041 %t2 = ashr <3 x i32> %v1, <i32 4, i32 5, i32 6>
1042 %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2>
1046 define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) {
1047 ; CHECK-LABEL: @and_2_vars(
1048 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> <i32 0, i32 4, i32 undef>
1049 ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i42> [[TMP1]], <i42 1, i42 5, i42 undef>
1050 ; CHECK-NEXT: ret <3 x i42> [[TMP2]]
1052 %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3>
1053 %t2 = and <3 x i42> %v1, <i42 4, i42 5, i42 6>
1054 %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef>
1058 ; It doesn't matter if only one intermediate op has extra uses.
1060 define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1061 ; CHECK-LABEL: @or_2_vars(
1062 ; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
1063 ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]])
1064 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1065 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 4>
1066 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1068 %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1069 call void @use_v4i32(<4 x i32> %t1)
1070 %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1071 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1075 define <4 x i32> @or_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1076 ; CHECK-LABEL: @or_2_vars_undef_mask_elt(
1077 ; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
1078 ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]])
1079 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
1080 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 undef>
1081 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1083 %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1084 call void @use_v4i32(<4 x i32> %t1)
1085 %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1086 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
1090 ; But we don't transform if both intermediate values have extra uses.
1092 define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1093 ; CHECK-LABEL: @xor_2_vars(
1094 ; CHECK-NEXT: [[T1:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
1095 ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]])
1096 ; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 7, i32 8>
1097 ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]])
1098 ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
1099 ; CHECK-NEXT: ret <4 x i32> [[T3]]
1101 %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1102 call void @use_v4i32(<4 x i32> %t1)
1103 %t2 = xor <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1104 call void @use_v4i32(<4 x i32> %t2)
1105 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
1109 ; Div/rem need special handling if the shuffle has undef elements.
1111 define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1112 ; CHECK-LABEL: @udiv_2_vars(
1113 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
1114 ; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]]
1115 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1117 %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1118 %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1119 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
1123 define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
1124 ; CHECK-LABEL: @udiv_2_vars_exact(
1125 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
1126 ; CHECK-NEXT: [[TMP2:%.*]] = udiv exact <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]]
1127 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1129 %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1130 %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1131 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
1135 ; TODO: This could be transformed using a safe constant.
1137 define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1138 ; CHECK-LABEL: @udiv_2_vars_undef_mask_elt(
1139 ; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1140 ; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1141 ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
1142 ; CHECK-NEXT: ret <4 x i32> [[T3]]
1144 %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1145 %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1146 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
1150 ; TODO: This could be transformed using a safe constant.
1152 define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1153 ; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt(
1154 ; CHECK-NEXT: [[T1:%.*]] = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1155 ; CHECK-NEXT: [[T2:%.*]] = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1156 ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
1157 ; CHECK-NEXT: ret <4 x i32> [[T3]]
1159 %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1160 %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1161 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
1165 ; If the shuffle has no undefs, it's safe to shuffle the variables first.
1167 define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1168 ; CHECK-LABEL: @sdiv_2_vars(
1169 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1170 ; CHECK-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4>
1171 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1173 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1174 %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1175 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1179 define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
1180 ; CHECK-LABEL: @sdiv_2_vars_exact(
1181 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1182 ; CHECK-NEXT: [[TMP2:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4>
1183 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1185 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1186 %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1187 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1191 ; Div by undef is UB. Undef is replaced by safe constant.
1193 define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1194 ; CHECK-LABEL: @sdiv_2_vars_undef_mask_elt(
1195 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
1196 ; CHECK-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1>
1197 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1199 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1200 %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1201 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
1205 ; Div by undef is UB. Undef is replaced by safe constant.
1207 define <4 x i32> @sdiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1208 ; CHECK-LABEL: @sdiv_2_vars_exact_undef_mask_elt(
1209 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
1210 ; CHECK-NEXT: [[TMP2:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1>
1211 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1213 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1214 %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1215 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
1219 ; If the shuffle has no undefs, it's safe to shuffle the variables first.
1221 define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1222 ; CHECK-LABEL: @urem_2_vars(
1223 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1224 ; CHECK-NEXT: [[TMP2:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[TMP1]]
1225 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1227 %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1228 %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1229 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1233 define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1234 ; CHECK-LABEL: @srem_2_vars(
1235 ; CHECK-NEXT: [[T1:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1236 ; CHECK-NEXT: [[T2:%.*]] = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1237 ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
1238 ; CHECK-NEXT: ret <4 x i32> [[T3]]
1240 %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1241 %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1242 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
1248 define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) {
1249 ; CHECK-LABEL: @fadd_2_vars(
1250 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1251 ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00>
1252 ; CHECK-NEXT: ret <4 x float> [[TMP2]]
1254 %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
1255 %t2 = fadd <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
1256 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1260 define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) {
1261 ; CHECK-LABEL: @fsub_2_vars(
1262 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
1263 ; CHECK-NEXT: [[TMP2:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]
1264 ; CHECK-NEXT: ret <4 x double> [[TMP2]]
1266 %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
1267 %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
1268 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
1269 ret <4 x double> %t3
1272 ; Intersect any FMF.
1274 define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) {
1275 ; CHECK-LABEL: @fmul_2_vars(
1276 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1277 ; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz <4 x float> [[TMP1]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
1278 ; CHECK-NEXT: ret <4 x float> [[TMP2]]
1280 %t1 = fmul reassoc nsz <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
1281 %t2 = fmul reassoc nsz <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
1282 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1286 define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) {
1287 ; CHECK-LABEL: @frem_2_vars(
1288 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
1289 ; CHECK-NEXT: [[TMP2:%.*]] = frem <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]
1290 ; CHECK-NEXT: ret <4 x double> [[TMP2]]
1292 %t1 = frem nnan ninf <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
1293 %t2 = frem nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
1294 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
1295 ret <4 x double> %t3
1298 ; The variable operand must be either the first operand or second operand in both binops.
1300 define <4 x double> @fdiv_2_vars(<4 x double> %v0, <4 x double> %v1) {
1301 ; CHECK-LABEL: @fdiv_2_vars(
1302 ; CHECK-NEXT: [[T1:%.*]] = fdiv <4 x double> <double 1.000000e+00, double 2.000000e+00, double poison, double poison>, [[V0:%.*]]
1303 ; CHECK-NEXT: [[T2:%.*]] = fdiv <4 x double> [[V1:%.*]], <double poison, double poison, double 7.000000e+00, double 8.000000e+00>
1304 ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1305 ; CHECK-NEXT: ret <4 x double> [[T3]]
1307 %t1 = fdiv <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
1308 %t2 = fdiv <4 x double> %v1, <double 5.0, double 6.0, double 7.0, double 8.0>
1309 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1310 ret <4 x double> %t3
1313 ; Shift-left with constant shift amount can be converted to mul to enable the fold.
1315 define <4 x i32> @mul_shl(<4 x i32> %v0) {
1316 ; CHECK-LABEL: @mul_shl(
1317 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 32, i32 64, i32 3, i32 4>
1318 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1320 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1321 %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1322 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1326 ; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved.
1328 define <4 x i32> @shl_mul(<4 x i32> %v0) {
1329 ; CHECK-LABEL: @shl_mul(
1330 ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 5, i32 undef, i32 8, i32 16>
1331 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1333 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1334 %t2 = mul nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1335 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
1339 ; Demanded elements + simplification can remove the mul alone, but that's not the best case.
1341 define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) {
1342 ; CHECK-LABEL: @mul_is_nop_shl(
1343 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 7, i32 8>
1344 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1346 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1347 %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1348 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1352 ; Negative test: shift amount (operand 1) must be constant.
1354 define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) {
1355 ; CHECK-LABEL: @shl_mul_not_constant_shift_amount(
1356 ; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1357 ; CHECK-NEXT: [[T2:%.*]] = mul <4 x i32> [[V0]], <i32 5, i32 6, i32 poison, i32 poison>
1358 ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T2]], <4 x i32> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1359 ; CHECK-NEXT: ret <4 x i32> [[T3]]
1361 %t1 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1362 %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1363 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1367 ; Try with 2 variable inputs.
1369 define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1370 ; CHECK-LABEL: @mul_shl_2_vars(
1371 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1372 ; CHECK-NEXT: [[TMP2:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 32, i32 64, i32 3, i32 4>
1373 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1375 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1376 %t2 = shl nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1377 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1381 define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1382 ; CHECK-LABEL: @shl_mul_2_vars(
1383 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 undef, i32 6, i32 7>
1384 ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 5, i32 undef, i32 8, i32 16>
1385 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1387 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1388 %t2 = mul nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1389 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
1393 ; Negate can be converted to mul to enable the fold.
1395 define <4 x i32> @mul_neg(<4 x i32> %x) {
1396 ; CHECK-LABEL: @mul_neg(
1397 ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[X:%.*]], <i32 257, i32 -3, i32 -1, i32 -9>
1398 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1400 %m = mul <4 x i32> %x, <i32 257, i32 -3, i32 poison, i32 -9>
1401 %n = sub <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, %x
1402 %r = shufflevector <4 x i32> %m, <4 x i32> %n, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1406 define <3 x i79> @neg_mul(<3 x i79> %x) {
1407 ; CHECK-LABEL: @neg_mul(
1408 ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <3 x i79> [[X:%.*]], <i79 -1, i79 -3, i79 -1>
1409 ; CHECK-NEXT: ret <3 x i79> [[TMP1]]
1411 %n = sub nsw <3 x i79> <i79 0, i79 poison, i79 0>, %x
1412 %m = mul nsw <3 x i79> %x, <i79 poison, i79 -3, i79 poison>
1413 %r = shufflevector <3 x i79> %n, <3 x i79> %m, <3 x i32> <i32 0, i32 4, i32 2>
1417 define <4 x i32> @mul_neg_2_vars(<4 x i32> %x, <4 x i32> %y) {
1418 ; CHECK-LABEL: @mul_neg_2_vars(
1419 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
1420 ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 42, i32 -1, i32 -1, i32 6>
1421 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1423 %m = mul nuw <4 x i32> %x, <i32 42, i32 poison, i32 poison, i32 6>
1424 %n = sub nsw <4 x i32> <i32 poison, i32 0, i32 0, i32 poison>, %y
1425 %r = shufflevector <4 x i32> %m, <4 x i32> %n, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
1429 define <4 x i32> @neg_mul_2_vars(<4 x i32> %x, <4 x i32> %y) {
1430 ; CHECK-LABEL: @neg_mul_2_vars(
1431 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1432 ; CHECK-NEXT: [[TMP2:%.*]] = mul nsw <4 x i32> [[TMP1]], <i32 -1, i32 42, i32 -1, i32 6>
1433 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1435 %n = sub nsw <4 x i32> <i32 0, i32 poison, i32 0, i32 poison>, %y
1436 %m = mul nuw nsw <4 x i32> %x, <i32 poison, i32 42, i32 poison, i32 6>
1437 %r = shufflevector <4 x i32> %n, <4 x i32> %m, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1441 ; Or with constant can be converted to add to enable the fold.
1442 ; The 'shl' is here to allow analysis to determine that the 'or' can be transformed to 'add'.
1443 ; TODO: The 'or' constant is limited to a splat.
1445 define <4 x i32> @add_or(<4 x i32> %v) {
1446 ; CHECK-LABEL: @add_or(
1447 ; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
1448 ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0]], <i32 31, i32 31, i32 65536, i32 65537>
1449 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1451 %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5> ; clear the bottom bits
1452 %t1 = add <4 x i32> %v0, <i32 65534, i32 65535, i32 65536, i32 65537> ; this can't be converted to 'or'
1453 %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31> ; set the bottom bits
1454 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1458 ; Try with 'or' as operand 0 of the shuffle.
1460 define <4 x i8> @or_add(<4 x i8> %v) {
1461 ; CHECK-LABEL: @or_add(
1462 ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
1463 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 -64, i8 -64>
1464 ; CHECK-NEXT: ret <4 x i8> [[TMP1]]
1466 %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3> ; clear the top bits
1467 %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192> ; set some top bits
1468 %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4> ; this can't be converted to 'or'
1469 %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1473 ; Negative test: not all 'or' insts can be converted to 'add'.
1475 define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) {
1476 ; CHECK-LABEL: @or_add_not_enough_masking(
1477 ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 1, i8 1, i8 1, i8 1>
1478 ; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], <i8 poison, i8 poison, i8 -64, i8 -64>
1479 ; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 poison, i8 poison>
1480 ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1481 ; CHECK-NEXT: ret <4 x i8> [[T3]]
1483 %v0 = lshr <4 x i8> %v, <i8 1, i8 1, i8 1, i8 1> ; clear not enough top bits
1484 %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192> ; set some top bits
1485 %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4> ; this can't be converted to 'or'
1486 %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1490 ; Try with 2 variable inputs.
1492 define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) {
1493 ; CHECK-LABEL: @add_or_2_vars(
1494 ; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
1495 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1496 ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], <i32 31, i32 31, i32 65536, i32 65537>
1497 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
1499 %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5> ; clear the bottom bits
1500 %t1 = add <4 x i32> %v1, <i32 65534, i32 65535, i32 65536, i32 65537> ; this can't be converted to 'or'
1501 %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31> ; set the bottom bits
1502 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1506 define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) {
1507 ; CHECK-LABEL: @or_add_2_vars(
1508 ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
1509 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1510 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i8> [[TMP1]], <i8 1, i8 2, i8 -64, i8 -64>
1511 ; CHECK-NEXT: ret <4 x i8> [[TMP2]]
1513 %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3> ; clear the top bits
1514 %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192> ; set some top bits
1515 %t2 = add nsw nuw <4 x i8> %v1, <i8 1, i8 2, i8 3, i8 4> ; this can't be converted to 'or'
1516 %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1520 ; The undef operand is used to simplify the shuffle mask, but don't assert that too soon.
1522 define <4 x i32> @PR41419(<4 x i32> %v) {
1523 ; CHECK-LABEL: @PR41419(
1524 ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 2, i32 undef>
1525 ; CHECK-NEXT: ret <4 x i32> [[S]]
1527 %s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>