1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
12 ; Don't use SVE for 64-bit vectors.
13 define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
14 ; CHECK-LABEL: icmp_eq_v8i8:
16 ; CHECK-NEXT: cmeq v0.8b, v0.8b, v1.8b
18 %cmp = icmp eq <8 x i8> %op1, %op2
19 %sext = sext <8 x i1> %cmp to <8 x i8>
23 ; Don't use SVE for 128-bit vectors.
24 define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 {
25 ; CHECK-LABEL: icmp_eq_v16i8:
27 ; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
29 %cmp = icmp eq <16 x i8> %op1, %op2
30 %sext = sext <16 x i1> %cmp to <16 x i8>
34 define void @icmp_eq_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
35 ; CHECK-LABEL: icmp_eq_v32i8:
37 ; CHECK-NEXT: ptrue p0.b, vl32
38 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
39 ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
40 ; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
41 ; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
42 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
44 %op1 = load <32 x i8>, ptr %a
45 %op2 = load <32 x i8>, ptr %b
46 %cmp = icmp eq <32 x i8> %op1, %op2
47 %sext = sext <32 x i1> %cmp to <32 x i8>
48 store <32 x i8> %sext, ptr %a
52 define void @icmp_eq_v64i8(ptr %a, ptr %b) #0 {
53 ; VBITS_GE_256-LABEL: icmp_eq_v64i8:
54 ; VBITS_GE_256: // %bb.0:
55 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32
56 ; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
57 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
58 ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
59 ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8]
60 ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1]
61 ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, z2.b
62 ; VBITS_GE_256-NEXT: cmpeq p2.b, p0/z, z1.b, z3.b
63 ; VBITS_GE_256-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
64 ; VBITS_GE_256-NEXT: mov z1.b, p2/z, #-1 // =0xffffffffffffffff
65 ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
66 ; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
67 ; VBITS_GE_256-NEXT: ret
69 ; VBITS_GE_512-LABEL: icmp_eq_v64i8:
70 ; VBITS_GE_512: // %bb.0:
71 ; VBITS_GE_512-NEXT: ptrue p0.b, vl64
72 ; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
73 ; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1]
74 ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
75 ; VBITS_GE_512-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
76 ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
77 ; VBITS_GE_512-NEXT: ret
78 %op1 = load <64 x i8>, ptr %a
79 %op2 = load <64 x i8>, ptr %b
80 %cmp = icmp eq <64 x i8> %op1, %op2
81 %sext = sext <64 x i1> %cmp to <64 x i8>
82 store <64 x i8> %sext, ptr %a
86 define void @icmp_eq_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
87 ; CHECK-LABEL: icmp_eq_v128i8:
89 ; CHECK-NEXT: ptrue p0.b, vl128
90 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
91 ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
92 ; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
93 ; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
94 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
96 %op1 = load <128 x i8>, ptr %a
97 %op2 = load <128 x i8>, ptr %b
98 %cmp = icmp eq <128 x i8> %op1, %op2
99 %sext = sext <128 x i1> %cmp to <128 x i8>
100 store <128 x i8> %sext, ptr %a
104 define void @icmp_eq_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
105 ; CHECK-LABEL: icmp_eq_v256i8:
107 ; CHECK-NEXT: ptrue p0.b, vl256
108 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
109 ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
110 ; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
111 ; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
112 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
114 %op1 = load <256 x i8>, ptr %a
115 %op2 = load <256 x i8>, ptr %b
116 %cmp = icmp eq <256 x i8> %op1, %op2
117 %sext = sext <256 x i1> %cmp to <256 x i8>
118 store <256 x i8> %sext, ptr %a
122 ; Don't use SVE for 64-bit vectors.
123 define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 {
124 ; CHECK-LABEL: icmp_eq_v4i16:
126 ; CHECK-NEXT: cmeq v0.4h, v0.4h, v1.4h
128 %cmp = icmp eq <4 x i16> %op1, %op2
129 %sext = sext <4 x i1> %cmp to <4 x i16>
133 ; Don't use SVE for 128-bit vectors.
134 define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 {
135 ; CHECK-LABEL: icmp_eq_v8i16:
137 ; CHECK-NEXT: cmeq v0.8h, v0.8h, v1.8h
139 %cmp = icmp eq <8 x i16> %op1, %op2
140 %sext = sext <8 x i1> %cmp to <8 x i16>
144 define void @icmp_eq_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
145 ; CHECK-LABEL: icmp_eq_v16i16:
147 ; CHECK-NEXT: ptrue p0.h, vl16
148 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
149 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
150 ; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
151 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
152 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
154 %op1 = load <16 x i16>, ptr %a
155 %op2 = load <16 x i16>, ptr %b
156 %cmp = icmp eq <16 x i16> %op1, %op2
157 %sext = sext <16 x i1> %cmp to <16 x i16>
158 store <16 x i16> %sext, ptr %a
162 define void @icmp_eq_v32i16(ptr %a, ptr %b) #0 {
163 ; VBITS_GE_256-LABEL: icmp_eq_v32i16:
164 ; VBITS_GE_256: // %bb.0:
165 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
166 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
167 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
168 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
169 ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
170 ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
171 ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, z2.h
172 ; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z1.h, z3.h
173 ; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
174 ; VBITS_GE_256-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff
175 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
176 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
177 ; VBITS_GE_256-NEXT: ret
179 ; VBITS_GE_512-LABEL: icmp_eq_v32i16:
180 ; VBITS_GE_512: // %bb.0:
181 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
182 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
183 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
184 ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
185 ; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
186 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
187 ; VBITS_GE_512-NEXT: ret
188 %op1 = load <32 x i16>, ptr %a
189 %op2 = load <32 x i16>, ptr %b
190 %cmp = icmp eq <32 x i16> %op1, %op2
191 %sext = sext <32 x i1> %cmp to <32 x i16>
192 store <32 x i16> %sext, ptr %a
196 define void @icmp_eq_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
197 ; CHECK-LABEL: icmp_eq_v64i16:
199 ; CHECK-NEXT: ptrue p0.h, vl64
200 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
201 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
202 ; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
203 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
204 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
206 %op1 = load <64 x i16>, ptr %a
207 %op2 = load <64 x i16>, ptr %b
208 %cmp = icmp eq <64 x i16> %op1, %op2
209 %sext = sext <64 x i1> %cmp to <64 x i16>
210 store <64 x i16> %sext, ptr %a
214 define void @icmp_eq_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
215 ; CHECK-LABEL: icmp_eq_v128i16:
217 ; CHECK-NEXT: ptrue p0.h, vl128
218 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
219 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
220 ; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
221 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
222 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
224 %op1 = load <128 x i16>, ptr %a
225 %op2 = load <128 x i16>, ptr %b
226 %cmp = icmp eq <128 x i16> %op1, %op2
227 %sext = sext <128 x i1> %cmp to <128 x i16>
228 store <128 x i16> %sext, ptr %a
232 ; Don't use SVE for 64-bit vectors.
233 define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 {
234 ; CHECK-LABEL: icmp_eq_v2i32:
236 ; CHECK-NEXT: cmeq v0.2s, v0.2s, v1.2s
238 %cmp = icmp eq <2 x i32> %op1, %op2
239 %sext = sext <2 x i1> %cmp to <2 x i32>
243 ; Don't use SVE for 128-bit vectors.
244 define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 {
245 ; CHECK-LABEL: icmp_eq_v4i32:
247 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
249 %cmp = icmp eq <4 x i32> %op1, %op2
250 %sext = sext <4 x i1> %cmp to <4 x i32>
254 define void @icmp_eq_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
255 ; CHECK-LABEL: icmp_eq_v8i32:
257 ; CHECK-NEXT: ptrue p0.s, vl8
258 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
259 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
260 ; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
261 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
262 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
264 %op1 = load <8 x i32>, ptr %a
265 %op2 = load <8 x i32>, ptr %b
266 %cmp = icmp eq <8 x i32> %op1, %op2
267 %sext = sext <8 x i1> %cmp to <8 x i32>
268 store <8 x i32> %sext, ptr %a
272 define void @icmp_eq_v16i32(ptr %a, ptr %b) #0 {
273 ; VBITS_GE_256-LABEL: icmp_eq_v16i32:
274 ; VBITS_GE_256: // %bb.0:
275 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
276 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
277 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
278 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
279 ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
280 ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
281 ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z2.s
282 ; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, z3.s
283 ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
284 ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
285 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
286 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
287 ; VBITS_GE_256-NEXT: ret
289 ; VBITS_GE_512-LABEL: icmp_eq_v16i32:
290 ; VBITS_GE_512: // %bb.0:
291 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
292 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
293 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
294 ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
295 ; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
296 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
297 ; VBITS_GE_512-NEXT: ret
298 %op1 = load <16 x i32>, ptr %a
299 %op2 = load <16 x i32>, ptr %b
300 %cmp = icmp eq <16 x i32> %op1, %op2
301 %sext = sext <16 x i1> %cmp to <16 x i32>
302 store <16 x i32> %sext, ptr %a
306 define void @icmp_eq_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
307 ; CHECK-LABEL: icmp_eq_v32i32:
309 ; CHECK-NEXT: ptrue p0.s, vl32
310 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
311 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
312 ; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
313 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
314 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
316 %op1 = load <32 x i32>, ptr %a
317 %op2 = load <32 x i32>, ptr %b
318 %cmp = icmp eq <32 x i32> %op1, %op2
319 %sext = sext <32 x i1> %cmp to <32 x i32>
320 store <32 x i32> %sext, ptr %a
324 define void @icmp_eq_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
325 ; CHECK-LABEL: icmp_eq_v64i32:
327 ; CHECK-NEXT: ptrue p0.s, vl64
328 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
329 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
330 ; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
331 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
332 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
334 %op1 = load <64 x i32>, ptr %a
335 %op2 = load <64 x i32>, ptr %b
336 %cmp = icmp eq <64 x i32> %op1, %op2
337 %sext = sext <64 x i1> %cmp to <64 x i32>
338 store <64 x i32> %sext, ptr %a
342 ; Don't use SVE for 64-bit vectors.
343 define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 {
344 ; CHECK-LABEL: icmp_eq_v1i64:
346 ; CHECK-NEXT: cmeq d0, d0, d1
348 %cmp = icmp eq <1 x i64> %op1, %op2
349 %sext = sext <1 x i1> %cmp to <1 x i64>
353 ; Don't use SVE for 128-bit vectors.
354 define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 {
355 ; CHECK-LABEL: icmp_eq_v2i64:
357 ; CHECK-NEXT: cmeq v0.2d, v0.2d, v1.2d
359 %cmp = icmp eq <2 x i64> %op1, %op2
360 %sext = sext <2 x i1> %cmp to <2 x i64>
364 define void @icmp_eq_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
365 ; CHECK-LABEL: icmp_eq_v4i64:
367 ; CHECK-NEXT: ptrue p0.d, vl4
368 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
369 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
370 ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
371 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
372 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
374 %op1 = load <4 x i64>, ptr %a
375 %op2 = load <4 x i64>, ptr %b
376 %cmp = icmp eq <4 x i64> %op1, %op2
377 %sext = sext <4 x i1> %cmp to <4 x i64>
378 store <4 x i64> %sext, ptr %a
382 define void @icmp_eq_v8i64(ptr %a, ptr %b) #0 {
383 ; VBITS_GE_256-LABEL: icmp_eq_v8i64:
384 ; VBITS_GE_256: // %bb.0:
385 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
386 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
387 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
388 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
389 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
390 ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
391 ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d
392 ; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, z3.d
393 ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
394 ; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
395 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
396 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
397 ; VBITS_GE_256-NEXT: ret
399 ; VBITS_GE_512-LABEL: icmp_eq_v8i64:
400 ; VBITS_GE_512: // %bb.0:
401 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
402 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
403 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
404 ; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
405 ; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
406 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
407 ; VBITS_GE_512-NEXT: ret
408 %op1 = load <8 x i64>, ptr %a
409 %op2 = load <8 x i64>, ptr %b
410 %cmp = icmp eq <8 x i64> %op1, %op2
411 %sext = sext <8 x i1> %cmp to <8 x i64>
412 store <8 x i64> %sext, ptr %a
416 define void @icmp_eq_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
417 ; CHECK-LABEL: icmp_eq_v16i64:
419 ; CHECK-NEXT: ptrue p0.d, vl16
420 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
421 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
422 ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
423 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
424 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
426 %op1 = load <16 x i64>, ptr %a
427 %op2 = load <16 x i64>, ptr %b
428 %cmp = icmp eq <16 x i64> %op1, %op2
429 %sext = sext <16 x i1> %cmp to <16 x i64>
430 store <16 x i64> %sext, ptr %a
434 define void @icmp_eq_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
435 ; CHECK-LABEL: icmp_eq_v32i64:
437 ; CHECK-NEXT: ptrue p0.d, vl32
438 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
439 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
440 ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
441 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
442 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
444 %op1 = load <32 x i64>, ptr %a
445 %op2 = load <32 x i64>, ptr %b
446 %cmp = icmp eq <32 x i64> %op1, %op2
447 %sext = sext <32 x i1> %cmp to <32 x i64>
448 store <32 x i64> %sext, ptr %a
456 define void @icmp_ne_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
457 ; CHECK-LABEL: icmp_ne_v32i8:
459 ; CHECK-NEXT: ptrue p0.b, vl32
460 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
461 ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
462 ; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z1.b
463 ; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
464 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
466 %op1 = load <32 x i8>, ptr %a
467 %op2 = load <32 x i8>, ptr %b
468 %cmp = icmp ne <32 x i8> %op1, %op2
469 %sext = sext <32 x i1> %cmp to <32 x i8>
470 store <32 x i8> %sext, ptr %a
478 define void @icmp_sge_v32i16(ptr %a, ptr %b) vscale_range(4,0) #0 {
479 ; CHECK-LABEL: icmp_sge_v32i16:
481 ; CHECK-NEXT: ptrue p0.h, vl32
482 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
483 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
484 ; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, z1.h
485 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
486 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
488 %op1 = load <32 x i16>, ptr %a
489 %op2 = load <32 x i16>, ptr %b
490 %cmp = icmp sge <32 x i16> %op1, %op2
491 %sext = sext <32 x i1> %cmp to <32 x i16>
492 store <32 x i16> %sext, ptr %a
500 define void @icmp_sgt_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
501 ; CHECK-LABEL: icmp_sgt_v16i16:
503 ; CHECK-NEXT: ptrue p0.h, vl16
504 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
505 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
506 ; CHECK-NEXT: cmpgt p1.h, p0/z, z0.h, z1.h
507 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
508 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
510 %op1 = load <16 x i16>, ptr %a
511 %op2 = load <16 x i16>, ptr %b
512 %cmp = icmp sgt <16 x i16> %op1, %op2
513 %sext = sext <16 x i1> %cmp to <16 x i16>
514 store <16 x i16> %sext, ptr %a
522 define void @icmp_sle_v16i32(ptr %a, ptr %b) vscale_range(4,0) #0 {
523 ; CHECK-LABEL: icmp_sle_v16i32:
525 ; CHECK-NEXT: ptrue p0.s, vl16
526 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
527 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
528 ; CHECK-NEXT: cmpge p1.s, p0/z, z1.s, z0.s
529 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
530 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
532 %op1 = load <16 x i32>, ptr %a
533 %op2 = load <16 x i32>, ptr %b
534 %cmp = icmp sle <16 x i32> %op1, %op2
535 %sext = sext <16 x i1> %cmp to <16 x i32>
536 store <16 x i32> %sext, ptr %a
544 define void @icmp_slt_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
545 ; CHECK-LABEL: icmp_slt_v8i32:
547 ; CHECK-NEXT: ptrue p0.s, vl8
548 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
549 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
550 ; CHECK-NEXT: cmpgt p1.s, p0/z, z1.s, z0.s
551 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
552 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
554 %op1 = load <8 x i32>, ptr %a
555 %op2 = load <8 x i32>, ptr %b
556 %cmp = icmp slt <8 x i32> %op1, %op2
557 %sext = sext <8 x i1> %cmp to <8 x i32>
558 store <8 x i32> %sext, ptr %a
566 define void @icmp_uge_v8i64(ptr %a, ptr %b) vscale_range(4,0) #0 {
567 ; CHECK-LABEL: icmp_uge_v8i64:
569 ; CHECK-NEXT: ptrue p0.d, vl8
570 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
571 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
572 ; CHECK-NEXT: cmphs p1.d, p0/z, z0.d, z1.d
573 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
574 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
576 %op1 = load <8 x i64>, ptr %a
577 %op2 = load <8 x i64>, ptr %b
578 %cmp = icmp uge <8 x i64> %op1, %op2
579 %sext = sext <8 x i1> %cmp to <8 x i64>
580 store <8 x i64> %sext, ptr %a
588 define void @icmp_ugt_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
589 ; CHECK-LABEL: icmp_ugt_v4i64:
591 ; CHECK-NEXT: ptrue p0.d, vl4
592 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
593 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
594 ; CHECK-NEXT: cmphi p1.d, p0/z, z0.d, z1.d
595 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
596 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
598 %op1 = load <4 x i64>, ptr %a
599 %op2 = load <4 x i64>, ptr %b
600 %cmp = icmp ugt <4 x i64> %op1, %op2
601 %sext = sext <4 x i1> %cmp to <4 x i64>
602 store <4 x i64> %sext, ptr %a
610 define void @icmp_ule_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
611 ; CHECK-LABEL: icmp_ule_v16i64:
613 ; CHECK-NEXT: ptrue p0.d, vl16
614 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
615 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
616 ; CHECK-NEXT: cmphs p1.d, p0/z, z1.d, z0.d
617 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
618 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
620 %op1 = load <16 x i64>, ptr %a
621 %op2 = load <16 x i64>, ptr %b
622 %cmp = icmp ule <16 x i64> %op1, %op2
623 %sext = sext <16 x i1> %cmp to <16 x i64>
624 store <16 x i64> %sext, ptr %a
632 define void @icmp_ult_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
633 ; CHECK-LABEL: icmp_ult_v32i64:
635 ; CHECK-NEXT: ptrue p0.d, vl32
636 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
637 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
638 ; CHECK-NEXT: cmphi p1.d, p0/z, z1.d, z0.d
639 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
640 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
642 %op1 = load <32 x i64>, ptr %a
643 %op2 = load <32 x i64>, ptr %b
644 %cmp = icmp ult <32 x i64> %op1, %op2
645 %sext = sext <32 x i1> %cmp to <32 x i64>
646 store <32 x i64> %sext, ptr %a
650 attributes #0 = { "target-features"="+sve" }