1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -instcombine -S -mtriple=arm -o - %s | FileCheck %s
4 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
6 declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>)
7 declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>)
8 declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>)
10 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
11 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
12 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
14 ; Round-trip conversions from predicate vector to i32 back to the same
15 ; size of vector should be eliminated.
17 define <4 x i1> @v2i2v_4(<4 x i1> %vin) {
18 ; CHECK-LABEL: @v2i2v_4(
20 ; CHECK-NEXT: ret <4 x i1> [[VIN:%.*]]
23 %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
24 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %int)
28 define <8 x i1> @v2i2v_8(<8 x i1> %vin) {
29 ; CHECK-LABEL: @v2i2v_8(
31 ; CHECK-NEXT: ret <8 x i1> [[VIN:%.*]]
34 %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin)
35 %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %int)
39 define <16 x i1> @v2i2v_16(<16 x i1> %vin) {
40 ; CHECK-LABEL: @v2i2v_16(
42 ; CHECK-NEXT: ret <16 x i1> [[VIN:%.*]]
45 %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin)
46 %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %int)
50 ; Conversions from a predicate vector to i32 and then to a _different_
51 ; size of predicate vector should be left alone.
53 define <16 x i1> @v2i2v_4_16(<4 x i1> %vin) {
54 ; CHECK-LABEL: @v2i2v_4_16(
56 ; CHECK-NEXT: [[INT:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range [[RNG0:![0-9]+]]
57 ; CHECK-NEXT: [[VOUT:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[INT]])
58 ; CHECK-NEXT: ret <16 x i1> [[VOUT]]
61 %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
62 %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %int)
66 define <4 x i1> @v2i2v_8_4(<8 x i1> %vin) {
67 ; CHECK-LABEL: @v2i2v_8_4(
69 ; CHECK-NEXT: [[INT:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[VIN:%.*]]), !range [[RNG0]]
70 ; CHECK-NEXT: [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[INT]])
71 ; CHECK-NEXT: ret <4 x i1> [[VOUT]]
74 %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin)
75 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %int)
79 define <8 x i1> @v2i2v_16_8(<16 x i1> %vin) {
80 ; CHECK-LABEL: @v2i2v_16_8(
82 ; CHECK-NEXT: [[INT:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[VIN:%.*]]), !range [[RNG0]]
83 ; CHECK-NEXT: [[VOUT:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[INT]])
84 ; CHECK-NEXT: ret <8 x i1> [[VOUT]]
87 %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin)
88 %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %int)
92 ; Round-trip conversions from i32 to predicate vector back to i32
93 ; should be eliminated.
95 define i32 @i2v2i_4(i32 %iin) {
96 ; CHECK-LABEL: @i2v2i_4(
98 ; CHECK-NEXT: ret i32 [[IIN:%.*]]
101 %vec = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %iin)
102 %iout = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vec)
106 define i32 @i2v2i_8(i32 %iin) {
107 ; CHECK-LABEL: @i2v2i_8(
109 ; CHECK-NEXT: ret i32 [[IIN:%.*]]
112 %vec = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %iin)
113 %iout = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vec)
117 define i32 @i2v2i_16(i32 %iin) {
118 ; CHECK-LABEL: @i2v2i_16(
120 ; CHECK-NEXT: ret i32 [[IIN:%.*]]
123 %vec = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %iin)
124 %iout = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vec)
128 ; v2i leaves the top 16 bits clear. So a trunc/zext pair applied to
129 ; its output, going via i16, can be completely eliminated - but not
130 ; one going via i8. Similarly with other methods of clearing the top
131 ; bits, like bitwise and.
133 define i32 @v2i_truncext_i16(<4 x i1> %vin) {
134 ; CHECK-LABEL: @v2i_truncext_i16(
136 ; CHECK-NEXT: [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range [[RNG0]]
137 ; CHECK-NEXT: ret i32 [[WIDE1]]
140 %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
141 %narrow = trunc i32 %wide1 to i16
142 %wide2 = zext i16 %narrow to i32
146 define i32 @v2i_truncext_i8(<4 x i1> %vin) {
147 ; CHECK-LABEL: @v2i_truncext_i8(
149 ; CHECK-NEXT: [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range [[RNG0]]
150 ; CHECK-NEXT: [[WIDE2:%.*]] = and i32 [[WIDE1]], 255
151 ; CHECK-NEXT: ret i32 [[WIDE2]]
154 %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
155 %narrow = trunc i32 %wide1 to i8
156 %wide2 = zext i8 %narrow to i32
160 define i32 @v2i_and_16(<4 x i1> %vin) {
161 ; CHECK-LABEL: @v2i_and_16(
163 ; CHECK-NEXT: [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range [[RNG0]]
164 ; CHECK-NEXT: ret i32 [[WIDE1]]
167 %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
168 %wide2 = and i32 %wide1, 65535
172 define i32 @v2i_and_15(<4 x i1> %vin) {
173 ; CHECK-LABEL: @v2i_and_15(
175 ; CHECK-NEXT: [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range [[RNG0]]
176 ; CHECK-NEXT: [[WIDE2:%.*]] = and i32 [[WIDE1]], 32767
177 ; CHECK-NEXT: ret i32 [[WIDE2]]
180 %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
181 %wide2 = and i32 %wide1, 32767
185 ; i2v doesn't use the top bits of its input. So the same operations
186 ; on a value that's about to be passed to i2v can be eliminated.
188 define <4 x i1> @i2v_truncext_i16(i32 %wide1) {
189 ; CHECK-LABEL: @i2v_truncext_i16(
191 ; CHECK-NEXT: [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE1:%.*]])
192 ; CHECK-NEXT: ret <4 x i1> [[VOUT]]
195 %narrow = trunc i32 %wide1 to i16
196 %wide2 = zext i16 %narrow to i32
197 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2)
201 define <4 x i1> @i2v_truncext_i8(i32 %wide1) {
202 ; CHECK-LABEL: @i2v_truncext_i8(
204 ; CHECK-NEXT: [[WIDE2:%.*]] = and i32 [[WIDE1:%.*]], 255
205 ; CHECK-NEXT: [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE2]])
206 ; CHECK-NEXT: ret <4 x i1> [[VOUT]]
209 %narrow = trunc i32 %wide1 to i8
210 %wide2 = zext i8 %narrow to i32
211 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2)
215 define <4 x i1> @i2v_and_16(i32 %wide1) {
216 ; CHECK-LABEL: @i2v_and_16(
218 ; CHECK-NEXT: [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE1:%.*]])
219 ; CHECK-NEXT: ret <4 x i1> [[VOUT]]
222 %wide2 = and i32 %wide1, 65535
223 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2)
227 define <4 x i1> @i2v_and_15(i32 %wide1) {
228 ; CHECK-LABEL: @i2v_and_15(
230 ; CHECK-NEXT: [[WIDE2:%.*]] = and i32 [[WIDE1:%.*]], 32767
231 ; CHECK-NEXT: [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE2]])
232 ; CHECK-NEXT: ret <4 x i1> [[VOUT]]
235 %wide2 = and i32 %wide1, 32767
236 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2)
240 ; If a predicate vector is round-tripped to an integer and back, and
241 ; complemented while it's in integer form, we should collapse that to
242 ; a complement of the vector itself. (Rationale: this is likely to
243 ; allow it to be code-generated as MVE VPNOT.)
245 define <4 x i1> @vpnot_4(<4 x i1> %vin) {
246 ; CHECK-LABEL: @vpnot_4(
248 ; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true>
249 ; CHECK-NEXT: ret <4 x i1> [[VOUT]]
252 %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
253 %flipped = xor i32 %int, 65535
254 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %flipped)
258 define <8 x i1> @vpnot_8(<8 x i1> %vin) {
259 ; CHECK-LABEL: @vpnot_8(
261 ; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
262 ; CHECK-NEXT: ret <8 x i1> [[VOUT]]
265 %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin)
266 %flipped = xor i32 %int, 65535
267 %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %flipped)
271 define <16 x i1> @vpnot_16(<16 x i1> %vin) {
272 ; CHECK-LABEL: @vpnot_16(
274 ; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
275 ; CHECK-NEXT: ret <16 x i1> [[VOUT]]
278 %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin)
279 %flipped = xor i32 %int, 65535
280 %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %flipped)
284 ; And this still works even if the i32 is narrowed to i16 and back on
285 ; opposite sides of the xor.
287 define <4 x i1> @vpnot_narrow_4(<4 x i1> %vin) {
288 ; CHECK-LABEL: @vpnot_narrow_4(
290 ; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true>
291 ; CHECK-NEXT: ret <4 x i1> [[VOUT]]
294 %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
295 %narrow = trunc i32 %int to i16
296 %flipped_narrow = xor i16 %narrow, -1
297 %flipped = zext i16 %flipped_narrow to i32
298 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %flipped)
302 define <8 x i1> @vpnot_narrow_8(<8 x i1> %vin) {
303 ; CHECK-LABEL: @vpnot_narrow_8(
305 ; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
306 ; CHECK-NEXT: ret <8 x i1> [[VOUT]]
309 %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin)
310 %narrow = trunc i32 %int to i16
311 %flipped_narrow = xor i16 %narrow, -1
312 %flipped = zext i16 %flipped_narrow to i32
313 %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %flipped)
317 define <16 x i1> @vpnot_narrow_16(<16 x i1> %vin) {
318 ; CHECK-LABEL: @vpnot_narrow_16(
320 ; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
321 ; CHECK-NEXT: ret <16 x i1> [[VOUT]]
324 %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin)
325 %narrow = trunc i32 %int to i16
326 %flipped_narrow = xor i16 %narrow, -1
327 %flipped = zext i16 %flipped_narrow to i32
328 %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %flipped)
332 define i32 @range_upper_limit(<16 x i1> %vin) {
333 ; CHECK-LABEL: @range_upper_limit(
335 ; CHECK-NEXT: [[INT:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[VIN:%.*]]), !range [[RNG0]]
336 ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[INT]], 65535
337 ; CHECK-NEXT: [[S:%.*]] = zext i1 [[C]] to i32
338 ; CHECK-NEXT: ret i32 [[S]]
341 %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin)
342 %c = icmp eq i32 %int, 65535
343 %s = select i1 %c, i32 1, i32 0