1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
12 ; Don't use SVE for 64-bit vectors.
13 define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
14 ; CHECK-LABEL: fcmp_oeq_v4f16:
16 ; CHECK-NEXT: fcmeq v0.4h, v0.4h, v1.4h
18 %cmp = fcmp oeq <4 x half> %op1, %op2
19 %sext = sext <4 x i1> %cmp to <4 x i16>
23 ; Don't use SVE for 128-bit vectors.
24 define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
25 ; CHECK-LABEL: fcmp_oeq_v8f16:
27 ; CHECK-NEXT: fcmeq v0.8h, v0.8h, v1.8h
29 %cmp = fcmp oeq <8 x half> %op1, %op2
30 %sext = sext <8 x i1> %cmp to <8 x i16>
34 define void @fcmp_oeq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
35 ; CHECK-LABEL: fcmp_oeq_v16f16:
37 ; CHECK-NEXT: ptrue p0.h, vl16
38 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
39 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
40 ; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
41 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
42 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
44 %op1 = load <16 x half>, ptr %a
45 %op2 = load <16 x half>, ptr %b
46 %cmp = fcmp oeq <16 x half> %op1, %op2
47 %sext = sext <16 x i1> %cmp to <16 x i16>
48 store <16 x i16> %sext, ptr %c
52 define void @fcmp_oeq_v32f16(ptr %a, ptr %b, ptr %c) #0 {
53 ; VBITS_GE_256-LABEL: fcmp_oeq_v32f16:
54 ; VBITS_GE_256: // %bb.0:
55 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
56 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
57 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
58 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
59 ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
60 ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
61 ; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z0.h, z2.h
62 ; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z1.h, z3.h
63 ; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
64 ; VBITS_GE_256-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff
65 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2, x8, lsl #1]
66 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2]
67 ; VBITS_GE_256-NEXT: ret
69 ; VBITS_GE_512-LABEL: fcmp_oeq_v32f16:
70 ; VBITS_GE_512: // %bb.0:
71 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
72 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
73 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
74 ; VBITS_GE_512-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
75 ; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
76 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2]
77 ; VBITS_GE_512-NEXT: ret
78 %op1 = load <32 x half>, ptr %a
79 %op2 = load <32 x half>, ptr %b
80 %cmp = fcmp oeq <32 x half> %op1, %op2
81 %sext = sext <32 x i1> %cmp to <32 x i16>
82 store <32 x i16> %sext, ptr %c
86 define void @fcmp_oeq_v64f16(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
87 ; CHECK-LABEL: fcmp_oeq_v64f16:
89 ; CHECK-NEXT: ptrue p0.h, vl64
90 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
91 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
92 ; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
93 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
94 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
96 %op1 = load <64 x half>, ptr %a
97 %op2 = load <64 x half>, ptr %b
98 %cmp = fcmp oeq <64 x half> %op1, %op2
99 %sext = sext <64 x i1> %cmp to <64 x i16>
100 store <64 x i16> %sext, ptr %c
104 define void @fcmp_oeq_v128f16(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
105 ; CHECK-LABEL: fcmp_oeq_v128f16:
107 ; CHECK-NEXT: ptrue p0.h, vl128
108 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
109 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
110 ; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
111 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
112 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
114 %op1 = load <128 x half>, ptr %a
115 %op2 = load <128 x half>, ptr %b
116 %cmp = fcmp oeq <128 x half> %op1, %op2
117 %sext = sext <128 x i1> %cmp to <128 x i16>
118 store <128 x i16> %sext, ptr %c
122 ; Don't use SVE for 64-bit vectors.
123 define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
124 ; CHECK-LABEL: fcmp_oeq_v2f32:
126 ; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s
128 %cmp = fcmp oeq <2 x float> %op1, %op2
129 %sext = sext <2 x i1> %cmp to <2 x i32>
133 ; Don't use SVE for 128-bit vectors.
134 define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
135 ; CHECK-LABEL: fcmp_oeq_v4f32:
137 ; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
139 %cmp = fcmp oeq <4 x float> %op1, %op2
140 %sext = sext <4 x i1> %cmp to <4 x i32>
144 define void @fcmp_oeq_v8f32(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
145 ; CHECK-LABEL: fcmp_oeq_v8f32:
147 ; CHECK-NEXT: ptrue p0.s, vl8
148 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
149 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
150 ; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
151 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
152 ; CHECK-NEXT: st1w { z0.s }, p0, [x2]
154 %op1 = load <8 x float>, ptr %a
155 %op2 = load <8 x float>, ptr %b
156 %cmp = fcmp oeq <8 x float> %op1, %op2
157 %sext = sext <8 x i1> %cmp to <8 x i32>
158 store <8 x i32> %sext, ptr %c
162 define void @fcmp_oeq_v16f32(ptr %a, ptr %b, ptr %c) #0 {
163 ; VBITS_GE_256-LABEL: fcmp_oeq_v16f32:
164 ; VBITS_GE_256: // %bb.0:
165 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
166 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
167 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
168 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
169 ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
170 ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
171 ; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, z2.s
172 ; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z1.s, z3.s
173 ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
174 ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
175 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2, x8, lsl #2]
176 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2]
177 ; VBITS_GE_256-NEXT: ret
179 ; VBITS_GE_512-LABEL: fcmp_oeq_v16f32:
180 ; VBITS_GE_512: // %bb.0:
181 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
182 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
183 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
184 ; VBITS_GE_512-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
185 ; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
186 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
187 ; VBITS_GE_512-NEXT: ret
188 %op1 = load <16 x float>, ptr %a
189 %op2 = load <16 x float>, ptr %b
190 %cmp = fcmp oeq <16 x float> %op1, %op2
191 %sext = sext <16 x i1> %cmp to <16 x i32>
192 store <16 x i32> %sext, ptr %c
196 define void @fcmp_oeq_v32f32(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
197 ; CHECK-LABEL: fcmp_oeq_v32f32:
199 ; CHECK-NEXT: ptrue p0.s, vl32
200 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
201 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
202 ; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
203 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
204 ; CHECK-NEXT: st1w { z0.s }, p0, [x2]
206 %op1 = load <32 x float>, ptr %a
207 %op2 = load <32 x float>, ptr %b
208 %cmp = fcmp oeq <32 x float> %op1, %op2
209 %sext = sext <32 x i1> %cmp to <32 x i32>
210 store <32 x i32> %sext, ptr %c
214 define void @fcmp_oeq_v64f32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
215 ; CHECK-LABEL: fcmp_oeq_v64f32:
217 ; CHECK-NEXT: ptrue p0.s, vl64
218 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
219 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
220 ; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
221 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
222 ; CHECK-NEXT: st1w { z0.s }, p0, [x2]
224 %op1 = load <64 x float>, ptr %a
225 %op2 = load <64 x float>, ptr %b
226 %cmp = fcmp oeq <64 x float> %op1, %op2
227 %sext = sext <64 x i1> %cmp to <64 x i32>
228 store <64 x i32> %sext, ptr %c
232 ; Don't use SVE for 64-bit vectors.
233 define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
234 ; CHECK-LABEL: fcmp_oeq_v1f64:
236 ; CHECK-NEXT: fcmeq d0, d0, d1
238 %cmp = fcmp oeq <1 x double> %op1, %op2
239 %sext = sext <1 x i1> %cmp to <1 x i64>
243 ; Don't use SVE for 128-bit vectors.
244 define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
245 ; CHECK-LABEL: fcmp_oeq_v2f64:
247 ; CHECK-NEXT: fcmeq v0.2d, v0.2d, v1.2d
249 %cmp = fcmp oeq <2 x double> %op1, %op2
250 %sext = sext <2 x i1> %cmp to <2 x i64>
254 define void @fcmp_oeq_v4f64(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
255 ; CHECK-LABEL: fcmp_oeq_v4f64:
257 ; CHECK-NEXT: ptrue p0.d, vl4
258 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
259 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
260 ; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
261 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
262 ; CHECK-NEXT: st1d { z0.d }, p0, [x2]
264 %op1 = load <4 x double>, ptr %a
265 %op2 = load <4 x double>, ptr %b
266 %cmp = fcmp oeq <4 x double> %op1, %op2
267 %sext = sext <4 x i1> %cmp to <4 x i64>
268 store <4 x i64> %sext, ptr %c
272 define void @fcmp_oeq_v8f64(ptr %a, ptr %b, ptr %c) #0 {
273 ; VBITS_GE_256-LABEL: fcmp_oeq_v8f64:
274 ; VBITS_GE_256: // %bb.0:
275 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
276 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
277 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
278 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
279 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
280 ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
281 ; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z0.d, z2.d
282 ; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z1.d, z3.d
283 ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
284 ; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
285 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2, x8, lsl #3]
286 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2]
287 ; VBITS_GE_256-NEXT: ret
289 ; VBITS_GE_512-LABEL: fcmp_oeq_v8f64:
290 ; VBITS_GE_512: // %bb.0:
291 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
292 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
293 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
294 ; VBITS_GE_512-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
295 ; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
296 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
297 ; VBITS_GE_512-NEXT: ret
298 %op1 = load <8 x double>, ptr %a
299 %op2 = load <8 x double>, ptr %b
300 %cmp = fcmp oeq <8 x double> %op1, %op2
301 %sext = sext <8 x i1> %cmp to <8 x i64>
302 store <8 x i64> %sext, ptr %c
306 define void @fcmp_oeq_v16f64(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
307 ; CHECK-LABEL: fcmp_oeq_v16f64:
309 ; CHECK-NEXT: ptrue p0.d, vl16
310 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
311 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
312 ; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
313 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
314 ; CHECK-NEXT: st1d { z0.d }, p0, [x2]
316 %op1 = load <16 x double>, ptr %a
317 %op2 = load <16 x double>, ptr %b
318 %cmp = fcmp oeq <16 x double> %op1, %op2
319 %sext = sext <16 x i1> %cmp to <16 x i64>
320 store <16 x i64> %sext, ptr %c
324 define void @fcmp_oeq_v32f64(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
325 ; CHECK-LABEL: fcmp_oeq_v32f64:
327 ; CHECK-NEXT: ptrue p0.d, vl32
328 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
329 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
330 ; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
331 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
332 ; CHECK-NEXT: st1d { z0.d }, p0, [x2]
334 %op1 = load <32 x double>, ptr %a
335 %op2 = load <32 x double>, ptr %b
336 %cmp = fcmp oeq <32 x double> %op1, %op2
337 %sext = sext <32 x i1> %cmp to <32 x i64>
338 store <32 x i64> %sext, ptr %c
346 define void @fcmp_ueq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
347 ; CHECK-LABEL: fcmp_ueq_v16f16:
349 ; CHECK-NEXT: ptrue p0.h, vl16
350 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
351 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
352 ; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h
353 ; CHECK-NEXT: fcmeq p2.h, p0/z, z0.h, z1.h
354 ; CHECK-NEXT: mov p1.b, p2/m, p2.b
355 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
356 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
358 %op1 = load <16 x half>, ptr %a
359 %op2 = load <16 x half>, ptr %b
360 %cmp = fcmp ueq <16 x half> %op1, %op2
361 %sext = sext <16 x i1> %cmp to <16 x i16>
362 store <16 x i16> %sext, ptr %c
370 define void @fcmp_one_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
371 ; CHECK-LABEL: fcmp_one_v16f16:
373 ; CHECK-NEXT: ptrue p0.h, vl16
374 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
375 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
376 ; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h
377 ; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h
378 ; CHECK-NEXT: mov p1.b, p2/m, p2.b
379 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
380 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
382 %op1 = load <16 x half>, ptr %a
383 %op2 = load <16 x half>, ptr %b
384 %cmp = fcmp one <16 x half> %op1, %op2
385 %sext = sext <16 x i1> %cmp to <16 x i16>
386 store <16 x i16> %sext, ptr %c
394 define void @fcmp_une_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
395 ; CHECK-LABEL: fcmp_une_v16f16:
397 ; CHECK-NEXT: ptrue p0.h, vl16
398 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
399 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
400 ; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
401 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
402 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
404 %op1 = load <16 x half>, ptr %a
405 %op2 = load <16 x half>, ptr %b
406 %cmp = fcmp une <16 x half> %op1, %op2
407 %sext = sext <16 x i1> %cmp to <16 x i16>
408 store <16 x i16> %sext, ptr %c
416 define void @fcmp_ogt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
417 ; CHECK-LABEL: fcmp_ogt_v16f16:
419 ; CHECK-NEXT: ptrue p0.h, vl16
420 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
421 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
422 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h
423 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
424 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
426 %op1 = load <16 x half>, ptr %a
427 %op2 = load <16 x half>, ptr %b
428 %cmp = fcmp ogt <16 x half> %op1, %op2
429 %sext = sext <16 x i1> %cmp to <16 x i16>
430 store <16 x i16> %sext, ptr %c
438 define void @fcmp_ugt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
439 ; CHECK-LABEL: fcmp_ugt_v16f16:
441 ; CHECK-NEXT: ptrue p0.h, vl16
442 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
443 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
444 ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h
445 ; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
446 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
447 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
448 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
450 %op1 = load <16 x half>, ptr %a
451 %op2 = load <16 x half>, ptr %b
452 %cmp = fcmp ugt <16 x half> %op1, %op2
453 %sext = sext <16 x i1> %cmp to <16 x i16>
454 store <16 x i16> %sext, ptr %c
462 define void @fcmp_olt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
463 ; CHECK-LABEL: fcmp_olt_v16f16:
465 ; CHECK-NEXT: ptrue p0.h, vl16
466 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
467 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
468 ; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h
469 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
470 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
472 %op1 = load <16 x half>, ptr %a
473 %op2 = load <16 x half>, ptr %b
474 %cmp = fcmp olt <16 x half> %op1, %op2
475 %sext = sext <16 x i1> %cmp to <16 x i16>
476 store <16 x i16> %sext, ptr %c
484 define void @fcmp_ult_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
485 ; CHECK-LABEL: fcmp_ult_v16f16:
487 ; CHECK-NEXT: ptrue p0.h, vl16
488 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
489 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
490 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h
491 ; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
492 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
493 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
494 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
496 %op1 = load <16 x half>, ptr %a
497 %op2 = load <16 x half>, ptr %b
498 %cmp = fcmp ult <16 x half> %op1, %op2
499 %sext = sext <16 x i1> %cmp to <16 x i16>
500 store <16 x i16> %sext, ptr %c
508 define void @fcmp_oge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
509 ; CHECK-LABEL: fcmp_oge_v16f16:
511 ; CHECK-NEXT: ptrue p0.h, vl16
512 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
513 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
514 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h
515 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
516 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
518 %op1 = load <16 x half>, ptr %a
519 %op2 = load <16 x half>, ptr %b
520 %cmp = fcmp oge <16 x half> %op1, %op2
521 %sext = sext <16 x i1> %cmp to <16 x i16>
522 store <16 x i16> %sext, ptr %c
530 define void @fcmp_uge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
531 ; CHECK-LABEL: fcmp_uge_v16f16:
533 ; CHECK-NEXT: ptrue p0.h, vl16
534 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
535 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
536 ; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h
537 ; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
538 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
539 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
540 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
542 %op1 = load <16 x half>, ptr %a
543 %op2 = load <16 x half>, ptr %b
544 %cmp = fcmp uge <16 x half> %op1, %op2
545 %sext = sext <16 x i1> %cmp to <16 x i16>
546 store <16 x i16> %sext, ptr %c
554 define void @fcmp_ole_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
555 ; CHECK-LABEL: fcmp_ole_v16f16:
557 ; CHECK-NEXT: ptrue p0.h, vl16
558 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
559 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
560 ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h
561 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
562 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
564 %op1 = load <16 x half>, ptr %a
565 %op2 = load <16 x half>, ptr %b
566 %cmp = fcmp ole <16 x half> %op1, %op2
567 %sext = sext <16 x i1> %cmp to <16 x i16>
568 store <16 x i16> %sext, ptr %c
576 define void @fcmp_ule_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
577 ; CHECK-LABEL: fcmp_ule_v16f16:
579 ; CHECK-NEXT: ptrue p0.h, vl16
580 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
581 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
582 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h
583 ; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
584 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
585 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
586 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
588 %op1 = load <16 x half>, ptr %a
589 %op2 = load <16 x half>, ptr %b
590 %cmp = fcmp ule <16 x half> %op1, %op2
591 %sext = sext <16 x i1> %cmp to <16 x i16>
592 store <16 x i16> %sext, ptr %c
600 define void @fcmp_uno_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
601 ; CHECK-LABEL: fcmp_uno_v16f16:
603 ; CHECK-NEXT: ptrue p0.h, vl16
604 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
605 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
606 ; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h
607 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
608 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
610 %op1 = load <16 x half>, ptr %a
611 %op2 = load <16 x half>, ptr %b
612 %cmp = fcmp uno <16 x half> %op1, %op2
613 %sext = sext <16 x i1> %cmp to <16 x i16>
614 store <16 x i16> %sext, ptr %c
622 define void @fcmp_ord_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
623 ; CHECK-LABEL: fcmp_ord_v16f16:
625 ; CHECK-NEXT: ptrue p0.h, vl16
626 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
627 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
628 ; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h
629 ; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
630 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
631 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
632 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
634 %op1 = load <16 x half>, ptr %a
635 %op2 = load <16 x half>, ptr %b
636 %cmp = fcmp ord <16 x half> %op1, %op2
637 %sext = sext <16 x i1> %cmp to <16 x i16>
638 store <16 x i16> %sext, ptr %c
646 define void @fcmp_eq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
647 ; CHECK-LABEL: fcmp_eq_v16f16:
649 ; CHECK-NEXT: ptrue p0.h, vl16
650 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
651 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
652 ; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
653 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
654 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
656 %op1 = load <16 x half>, ptr %a
657 %op2 = load <16 x half>, ptr %b
658 %cmp = fcmp fast oeq <16 x half> %op1, %op2
659 %sext = sext <16 x i1> %cmp to <16 x i16>
660 store <16 x i16> %sext, ptr %c
668 define void @fcmp_ne_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
669 ; CHECK-LABEL: fcmp_ne_v16f16:
671 ; CHECK-NEXT: ptrue p0.h, vl16
672 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
673 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
674 ; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
675 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
676 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
678 %op1 = load <16 x half>, ptr %a
679 %op2 = load <16 x half>, ptr %b
680 %cmp = fcmp fast one <16 x half> %op1, %op2
681 %sext = sext <16 x i1> %cmp to <16 x i16>
682 store <16 x i16> %sext, ptr %c
690 define void @fcmp_gt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
691 ; CHECK-LABEL: fcmp_gt_v16f16:
693 ; CHECK-NEXT: ptrue p0.h, vl16
694 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
695 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
696 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h
697 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
698 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
700 %op1 = load <16 x half>, ptr %a
701 %op2 = load <16 x half>, ptr %b
702 %cmp = fcmp fast ogt <16 x half> %op1, %op2
703 %sext = sext <16 x i1> %cmp to <16 x i16>
704 store <16 x i16> %sext, ptr %c
712 define void @fcmp_lt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
713 ; CHECK-LABEL: fcmp_lt_v16f16:
715 ; CHECK-NEXT: ptrue p0.h, vl16
716 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
717 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
718 ; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h
719 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
720 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
722 %op1 = load <16 x half>, ptr %a
723 %op2 = load <16 x half>, ptr %b
724 %cmp = fcmp fast olt <16 x half> %op1, %op2
725 %sext = sext <16 x i1> %cmp to <16 x i16>
726 store <16 x i16> %sext, ptr %c
734 define void @fcmp_ge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
735 ; CHECK-LABEL: fcmp_ge_v16f16:
737 ; CHECK-NEXT: ptrue p0.h, vl16
738 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
739 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
740 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h
741 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
742 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
744 %op1 = load <16 x half>, ptr %a
745 %op2 = load <16 x half>, ptr %b
746 %cmp = fcmp fast oge <16 x half> %op1, %op2
747 %sext = sext <16 x i1> %cmp to <16 x i16>
748 store <16 x i16> %sext, ptr %c
756 define void @fcmp_le_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
757 ; CHECK-LABEL: fcmp_le_v16f16:
759 ; CHECK-NEXT: ptrue p0.h, vl16
760 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
761 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
762 ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h
763 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
764 ; CHECK-NEXT: st1h { z0.h }, p0, [x2]
766 %op1 = load <16 x half>, ptr %a
767 %op2 = load <16 x half>, ptr %b
768 %cmp = fcmp fast ole <16 x half> %op1, %op2
769 %sext = sext <16 x i1> %cmp to <16 x i16>
770 store <16 x i16> %sext, ptr %c
774 attributes #0 = { "target-features"="+sve" }