1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: opt -passes=instcombine -S %s | llc -mtriple=aarch64--linux-gnu -mattr=+sve -o - | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
10 define i32 @cmple_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
11 ; CHECK-LABEL: cmple_imm_nxv16i8:
13 ; CHECK-NEXT: cmple p0.b, p0/z, z0.b, #0
14 ; CHECK-NEXT: cset w0, ne
16 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
17 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
18 %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
19 %conv = zext i1 %3 to i32
27 define i32 @cmple_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
28 ; CHECK-LABEL: cmple_wide_nxv16i8:
30 ; CHECK-NEXT: cmple p0.b, p0/z, z0.b, z1.d
31 ; CHECK-NEXT: cset w0, ne
33 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
34 %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
35 %conv = zext i1 %2 to i32
39 define i32 @cmple_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
40 ; CHECK-LABEL: cmple_wide_nxv8i16:
42 ; CHECK-NEXT: cmple p0.h, p0/z, z0.h, z1.d
43 ; CHECK-NEXT: cset w0, ne
45 %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
46 %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
47 %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
48 %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
49 %conv = zext i1 %4 to i32
53 define i32 @cmple_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
54 ; CHECK-LABEL: cmple_wide_nxv4i32:
56 ; CHECK-NEXT: cmple p0.s, p0/z, z0.s, z1.d
57 ; CHECK-NEXT: cset w0, ne
59 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
60 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
61 %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
62 %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
63 %conv = zext i1 %4 to i32
67 ; ==============================================================================
68 ; PTEST_OP(PG, CMP(PG, ...))
69 ; ==============================================================================
72 ; PTEST_FIRST(PG, CMP8(PG, A, B)). PTEST is redundant.
74 define i1 @cmp8_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
75 ; CHECK-LABEL: cmp8_ptest_first_px:
77 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
78 ; CHECK-NEXT: cset w0, mi
80 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
81 %2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
86 ; PTEST_LAST(PG, CMP8(PG, A, B)). PTEST is redundant.
88 define i1 @cmp8_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
89 ; CHECK-LABEL: cmp8_ptest_last_px:
91 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
92 ; CHECK-NEXT: cset w0, lo
94 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
95 %2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
100 ; PTEST_ANY(PG, CMP8(PG, A, B)). PTEST is redundant.
102 define i1 @cmp8_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
103 ; CHECK-LABEL: cmp8_ptest_any_px:
105 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
106 ; CHECK-NEXT: cset w0, ne
108 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
109 %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
114 ; Same as above except PG = incorrectly sized PTRUE
116 define i1 @cmp8_ptest_any_px_bad_ptrue(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
117 ; CHECK-LABEL: cmp8_ptest_any_px_bad_ptrue:
119 ; CHECK-NEXT: ptrue p0.s
120 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
121 ; CHECK-NEXT: cset w0, ne
123 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
124 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
125 %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %2, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
126 %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %3)
131 ; PTEST_FIRST(PG, CMP32(PG, A, B)). Can't remove PTEST since PTEST.B vs CMP.S.
133 define i1 @cmp32_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
134 ; CHECK-LABEL: cmp32_ptest_first_px:
136 ; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
137 ; CHECK-NEXT: ptest p0, p1.b
138 ; CHECK-NEXT: cset w0, mi
140 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
141 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
142 %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
143 %4 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
148 ; PTEST_LAST(PG, CMP32(PG, A, B)). Can't remove PTEST since PTEST.B vs CMP.S.
150 define i1 @cmp32_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
151 ; CHECK-LABEL: cmp32_ptest_last_px:
153 ; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
154 ; CHECK-NEXT: ptest p0, p1.b
155 ; CHECK-NEXT: cset w0, lo
157 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
158 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
159 %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
160 %4 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
165 ; PTEST_ANY(PG, CMP32(PG, A, B)). PTEST is redundant.
167 define i1 @cmp32_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
168 ; CHECK-LABEL: cmp32_ptest_any_px:
170 ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
171 ; CHECK-NEXT: cset w0, ne
173 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
174 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
175 %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
176 %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
181 ; Same as above except PG = incorrectly sized PTRUE
183 define i1 @cmp32_ptest_any_px_bad_ptrue(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
184 ; CHECK-LABEL: cmp32_ptest_any_px_bad_ptrue:
186 ; CHECK-NEXT: ptrue p0.b
187 ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
188 ; CHECK-NEXT: cset w0, ne
190 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
191 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
192 %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %2, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
193 %4 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %3)
194 %5 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %4)
198 ; ==============================================================================
199 ; PTEST_OP(X=CMP(PG, ...), X)
200 ; ==============================================================================
203 ; PTEST_FIRST(X=CMP8(PG, A, B), X). PTEST is redundant if condition is changed
206 ; Can't remove PTEST and keep the same condition (first), since the mask for
207 ; the implicit PTEST (PG) performed by the compare differs from the mask
208 ; specified to the explicit PTEST and could have a different result.
210 ; For example, consider
216 ; X=CMPLE(PG, Z0, Z1)
217 ; =<0, 1, x, x> NZCV=0xxx
218 ; PTEST(X, X), NZCV=1xxx
220 ; where the first active flag (bit 'N' in NZCV) is set by the explicit PTEST,
221 ; but not by the implicit PTEST as part of the compare. However, given the
222 ; PTEST mask and source are the same, first is equivalent to any. The same
223 ; applies to last active.
225 define i1 @cmp8_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
226 ; CHECK-LABEL: cmp8_ptest_first_xx:
228 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
229 ; CHECK-NEXT: cset w0, ne
231 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
232 %2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
237 ; PTEST_LAST(X=CMP8(PG, A, B), X). PTEST is redundant if condition is changed
240 define i1 @cmp8_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
241 ; CHECK-LABEL: cmp8_ptest_last_xx:
243 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
244 ; CHECK-NEXT: cset w0, ne
246 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
247 %2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
252 ; PTEST_ANY(X=CMP8(PG, A, B), X). PTEST is redundant.
254 define i1 @cmp8_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
255 ; CHECK-LABEL: cmp8_ptest_any_xx:
257 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
258 ; CHECK-NEXT: cset w0, ne
260 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
261 %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
266 ; PTEST_FIRST(X=CMP32(PG, A, B), X). PTEST is redundant if condition is changed
269 define i1 @cmp32_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
270 ; CHECK-LABEL: cmp32_ptest_first_xx:
272 ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
273 ; CHECK-NEXT: cset w0, ne
275 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
276 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
277 %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2)
282 ; PTEST_LAST(X=CMP32(PG, A, B), X). PTEST is redundant if condition is changed
285 define i1 @cmp32_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
286 ; CHECK-LABEL: cmp32_ptest_last_xx:
288 ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
289 ; CHECK-NEXT: cset w0, ne
291 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
292 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
293 %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2)
298 ; PTEST_ANY(X=CMP32(PG, A, B), X). PTEST is redundant.
300 define i1 @cmp32_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
301 ; CHECK-LABEL: cmp32_ptest_any_xx:
303 ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
304 ; CHECK-NEXT: cset w0, ne
306 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
307 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
308 %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2)
312 ; ==============================================================================
313 ; PTEST_OP(PTRUE_ALL, CMP(PG, ...))
314 ; ==============================================================================
317 ; PTEST_FIRST(PTRUE_ALL, CMP8(PG, A, B)). Can't remove PTEST since mask is
320 define i1 @cmp8_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
321 ; CHECK-LABEL: cmp8_ptest_first_ax:
323 ; CHECK-NEXT: ptrue p1.b
324 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
325 ; CHECK-NEXT: ptest p1, p0.b
326 ; CHECK-NEXT: cset w0, mi
328 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
329 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
330 %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
335 ; PTEST_LAST(PTRUE_ALL, CMP8(PG, A, B)). Can't remove PTEST since mask is
338 define i1 @cmp8_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
339 ; CHECK-LABEL: cmp8_ptest_last_ax:
341 ; CHECK-NEXT: ptrue p1.b
342 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
343 ; CHECK-NEXT: ptest p1, p0.b
344 ; CHECK-NEXT: cset w0, lo
346 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
347 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
348 %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
353 ; PTEST_ANY(PTRUE_ALL, CMP8(PG, A, B)). PTEST is redundant.
355 define i1 @cmp8_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
356 ; CHECK-LABEL: cmp8_ptest_any_ax:
358 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
359 ; CHECK-NEXT: cset w0, ne
361 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
362 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
363 %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
368 ; PTEST_FIRST(PTRUE_ALL, CMP32(PG, A, B)). Can't remove PTEST since mask is
371 define i1 @cmp32_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
372 ; CHECK-LABEL: cmp32_ptest_first_ax:
374 ; CHECK-NEXT: ptrue p1.s
375 ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
376 ; CHECK-NEXT: ptest p1, p0.b
377 ; CHECK-NEXT: cset w0, mi
379 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
380 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
381 %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
382 %4 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2)
387 ; PTEST_LAST(PTRUE_ALL, CMP32(PG, A, B)). Can't remove PTEST since mask is
390 define i1 @cmp32_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
391 ; CHECK-LABEL: cmp32_ptest_last_ax:
393 ; CHECK-NEXT: ptrue p1.s
394 ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
395 ; CHECK-NEXT: ptest p1, p0.b
396 ; CHECK-NEXT: cset w0, lo
398 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
399 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
400 %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
401 %4 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2)
406 ; PTEST_ANY(PTRUE_ALL, CMP32(PG, A, B)). PTEST is redundant.
408 define i1 @cmp32_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
409 ; CHECK-LABEL: cmp32_ptest_any_ax:
411 ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
412 ; CHECK-NEXT: cset w0, ne
414 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
415 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
416 %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
417 %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2)
421 ; ==============================================================================
422 ; PTEST_OP(PTRUE_ALL, CMP(PTRUE_ALL, ...))
423 ; ==============================================================================
426 ; PTEST_FIRST(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant.
428 define i1 @cmp8_ptest_first_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
429 ; CHECK-LABEL: cmp8_ptest_first_aa:
431 ; CHECK-NEXT: ptrue p0.b
432 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
433 ; CHECK-NEXT: cset w0, mi
435 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
436 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
437 %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
442 ; PTEST_LAST(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant.
444 define i1 @cmp8_ptest_last_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
445 ; CHECK-LABEL: cmp8_ptest_last_aa:
447 ; CHECK-NEXT: ptrue p0.b
448 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
449 ; CHECK-NEXT: cset w0, lo
451 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
452 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
453 %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
458 ; PTEST_ANY(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant.
460 define i1 @cmp8_ptest_any_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
461 ; CHECK-LABEL: cmp8_ptest_any_aa:
463 ; CHECK-NEXT: ptrue p0.b
464 ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
465 ; CHECK-NEXT: cset w0, ne
467 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
468 %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
469 %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
474 ; PTEST_FIRST(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant.
476 define i1 @cmp32_ptest_first_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
477 ; CHECK-LABEL: cmp32_ptest_first_aa:
479 ; CHECK-NEXT: ptrue p0.s
480 ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
481 ; CHECK-NEXT: cset w0, mi
483 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
484 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
485 %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2)
490 ; PTEST_LAST(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant.
492 define i1 @cmp32_ptest_last_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
493 ; CHECK-LABEL: cmp32_ptest_last_aa:
495 ; CHECK-NEXT: ptrue p0.s
496 ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
497 ; CHECK-NEXT: cset w0, lo
499 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
500 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
501 %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2)
506 ; PTEST_ANY(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant.
508 define i1 @cmp32_ptest_any_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
509 ; CHECK-LABEL: cmp32_ptest_any_aa:
511 ; CHECK-NEXT: ptrue p0.s
512 ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
513 ; CHECK-NEXT: cset w0, ne
515 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
516 %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
517 %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2)
521 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
522 declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
523 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
524 declare <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
525 declare <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
527 declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
528 declare i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
529 declare i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
531 declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
532 declare i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
533 declare i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
535 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
536 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
538 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
539 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
541 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
542 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)