1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
7 declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>)
8 declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>)
9 declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>)
10 declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
11 declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
12 declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
13 declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>)
14 declare i16 @llvm.vector.reduce.smin.v3i16(<3 x i16>)
15 declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
16 declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
17 declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
18 declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
19 declare i32 @llvm.vector.reduce.smin.v3i32(<3 x i32>)
20 declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
21 declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
22 declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>)
23 declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
24 declare i64 @llvm.vector.reduce.smin.v3i64(<3 x i64>)
25 declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
26 declare i128 @llvm.vector.reduce.smin.v2i128(<2 x i128>)
27 declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>)
28 declare i8 @llvm.vector.reduce.smax.v3i8(<3 x i8>)
29 declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>)
30 declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
31 declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
32 declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
33 declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>)
34 declare i16 @llvm.vector.reduce.smax.v3i16(<3 x i16>)
35 declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
36 declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
37 declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
38 declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
39 declare i32 @llvm.vector.reduce.smax.v3i32(<3 x i32>)
40 declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
41 declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
42 declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>)
43 declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
44 declare i64 @llvm.vector.reduce.smax.v3i64(<3 x i64>)
45 declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
46 declare i128 @llvm.vector.reduce.smax.v2i128(<2 x i128>)
47 declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>)
48 declare i8 @llvm.vector.reduce.umin.v3i8(<3 x i8>)
49 declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
50 declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
51 declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
52 declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
53 declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
54 declare i16 @llvm.vector.reduce.umin.v3i16(<3 x i16>)
55 declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
56 declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
57 declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
58 declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
59 declare i32 @llvm.vector.reduce.umin.v3i32(<3 x i32>)
60 declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
61 declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
62 declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>)
63 declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
64 declare i64 @llvm.vector.reduce.umin.v3i64(<3 x i64>)
65 declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
66 declare i128 @llvm.vector.reduce.umin.v2i128(<2 x i128>)
67 declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>)
68 declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8>)
69 declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>)
70 declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
71 declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
72 declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
73 declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>)
74 declare i16 @llvm.vector.reduce.umax.v3i16(<3 x i16>)
75 declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
76 declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
77 declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
78 declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
79 declare i32 @llvm.vector.reduce.umax.v3i32(<3 x i32>)
80 declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
81 declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
82 declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>)
83 declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
84 declare i64 @llvm.vector.reduce.umax.v3i64(<3 x i64>)
85 declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
86 declare i128 @llvm.vector.reduce.umax.v2i128(<2 x i128>)
88 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
89 declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
91 define i8 @smax_B(ptr nocapture readonly %arr) {
92 ; CHECK-LABEL: smax_B:
94 ; CHECK-NEXT: ldr q0, [x0]
95 ; CHECK-NEXT: smaxv b0, v0.16b
96 ; CHECK-NEXT: fmov w0, s0
98 %arr.load = load <16 x i8>, ptr %arr
99 %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %arr.load)
103 define i16 @smax_H(ptr nocapture readonly %arr) {
104 ; CHECK-LABEL: smax_H:
106 ; CHECK-NEXT: ldr q0, [x0]
107 ; CHECK-NEXT: smaxv h0, v0.8h
108 ; CHECK-NEXT: fmov w0, s0
110 %arr.load = load <8 x i16>, ptr %arr
111 %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %arr.load)
115 define i32 @smax_S(ptr nocapture readonly %arr) {
116 ; CHECK-LABEL: smax_S:
118 ; CHECK-NEXT: ldr q0, [x0]
119 ; CHECK-NEXT: smaxv s0, v0.4s
120 ; CHECK-NEXT: fmov w0, s0
122 %arr.load = load <4 x i32>, ptr %arr
123 %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %arr.load)
127 define i8 @umax_B(ptr nocapture readonly %arr) {
128 ; CHECK-LABEL: umax_B:
130 ; CHECK-NEXT: ldr q0, [x0]
131 ; CHECK-NEXT: umaxv b0, v0.16b
132 ; CHECK-NEXT: fmov w0, s0
134 %arr.load = load <16 x i8>, ptr %arr
135 %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %arr.load)
139 define i16 @umax_H(ptr nocapture readonly %arr) {
140 ; CHECK-LABEL: umax_H:
142 ; CHECK-NEXT: ldr q0, [x0]
143 ; CHECK-NEXT: umaxv h0, v0.8h
144 ; CHECK-NEXT: fmov w0, s0
146 %arr.load = load <8 x i16>, ptr %arr
147 %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %arr.load)
151 define i32 @umax_S(ptr nocapture readonly %arr) {
152 ; CHECK-LABEL: umax_S:
154 ; CHECK-NEXT: ldr q0, [x0]
155 ; CHECK-NEXT: umaxv s0, v0.4s
156 ; CHECK-NEXT: fmov w0, s0
158 %arr.load = load <4 x i32>, ptr %arr
159 %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %arr.load)
163 define i8 @smin_B(ptr nocapture readonly %arr) {
164 ; CHECK-LABEL: smin_B:
166 ; CHECK-NEXT: ldr q0, [x0]
167 ; CHECK-NEXT: sminv b0, v0.16b
168 ; CHECK-NEXT: fmov w0, s0
170 %arr.load = load <16 x i8>, ptr %arr
171 %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %arr.load)
175 define i16 @smin_H(ptr nocapture readonly %arr) {
176 ; CHECK-LABEL: smin_H:
178 ; CHECK-NEXT: ldr q0, [x0]
179 ; CHECK-NEXT: sminv h0, v0.8h
180 ; CHECK-NEXT: fmov w0, s0
182 %arr.load = load <8 x i16>, ptr %arr
183 %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %arr.load)
187 define i32 @smin_S(ptr nocapture readonly %arr) {
188 ; CHECK-LABEL: smin_S:
190 ; CHECK-NEXT: ldr q0, [x0]
191 ; CHECK-NEXT: sminv s0, v0.4s
192 ; CHECK-NEXT: fmov w0, s0
194 %arr.load = load <4 x i32>, ptr %arr
195 %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %arr.load)
199 define i8 @umin_B(ptr nocapture readonly %arr) {
200 ; CHECK-LABEL: umin_B:
202 ; CHECK-NEXT: ldr q0, [x0]
203 ; CHECK-NEXT: uminv b0, v0.16b
204 ; CHECK-NEXT: fmov w0, s0
206 %arr.load = load <16 x i8>, ptr %arr
207 %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %arr.load)
211 define i16 @umin_H(ptr nocapture readonly %arr) {
212 ; CHECK-LABEL: umin_H:
214 ; CHECK-NEXT: ldr q0, [x0]
215 ; CHECK-NEXT: uminv h0, v0.8h
216 ; CHECK-NEXT: fmov w0, s0
218 %arr.load = load <8 x i16>, ptr %arr
219 %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %arr.load)
223 define i32 @umin_S(ptr nocapture readonly %arr) {
224 ; CHECK-LABEL: umin_S:
226 ; CHECK-NEXT: ldr q0, [x0]
227 ; CHECK-NEXT: uminv s0, v0.4s
228 ; CHECK-NEXT: fmov w0, s0
230 %arr.load = load <4 x i32>, ptr %arr
231 %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %arr.load)
235 define float @fmaxnm_S(ptr nocapture readonly %arr) {
236 ; CHECK-LABEL: fmaxnm_S:
238 ; CHECK-NEXT: ldr q0, [x0]
239 ; CHECK-NEXT: fmaxnmv s0, v0.4s
241 %arr.load = load <4 x float>, ptr %arr
242 %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arr.load)
246 define float @fminnm_S(ptr nocapture readonly %arr) {
247 ; CHECK-LABEL: fminnm_S:
249 ; CHECK-NEXT: ldr q0, [x0]
250 ; CHECK-NEXT: fminnmv s0, v0.4s
252 %arr.load = load <4 x float>, ptr %arr
253 %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arr.load)
257 define i16 @oversized_umax_256(ptr nocapture readonly %arr) {
258 ; CHECK-SD-LABEL: oversized_umax_256:
259 ; CHECK-SD: // %bb.0:
260 ; CHECK-SD-NEXT: ldp q1, q0, [x0]
261 ; CHECK-SD-NEXT: umax v0.8h, v1.8h, v0.8h
262 ; CHECK-SD-NEXT: umaxv h0, v0.8h
263 ; CHECK-SD-NEXT: fmov w0, s0
266 ; CHECK-GI-LABEL: oversized_umax_256:
267 ; CHECK-GI: // %bb.0:
268 ; CHECK-GI-NEXT: ldp q0, q1, [x0]
269 ; CHECK-GI-NEXT: umax v0.8h, v0.8h, v1.8h
270 ; CHECK-GI-NEXT: umaxv h0, v0.8h
271 ; CHECK-GI-NEXT: fmov w0, s0
273 %arr.load = load <16 x i16>, ptr %arr
274 %r = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %arr.load)
278 define i32 @oversized_umax_512(ptr nocapture readonly %arr) {
279 ; CHECK-SD-LABEL: oversized_umax_512:
280 ; CHECK-SD: // %bb.0:
281 ; CHECK-SD-NEXT: ldp q0, q1, [x0, #32]
282 ; CHECK-SD-NEXT: ldp q2, q3, [x0]
283 ; CHECK-SD-NEXT: umax v1.4s, v3.4s, v1.4s
284 ; CHECK-SD-NEXT: umax v0.4s, v2.4s, v0.4s
285 ; CHECK-SD-NEXT: umax v0.4s, v0.4s, v1.4s
286 ; CHECK-SD-NEXT: umaxv s0, v0.4s
287 ; CHECK-SD-NEXT: fmov w0, s0
290 ; CHECK-GI-LABEL: oversized_umax_512:
291 ; CHECK-GI: // %bb.0:
292 ; CHECK-GI-NEXT: ldp q0, q1, [x0]
293 ; CHECK-GI-NEXT: ldp q2, q3, [x0, #32]
294 ; CHECK-GI-NEXT: umax v0.4s, v0.4s, v1.4s
295 ; CHECK-GI-NEXT: umax v1.4s, v2.4s, v3.4s
296 ; CHECK-GI-NEXT: umax v0.4s, v0.4s, v1.4s
297 ; CHECK-GI-NEXT: umaxv s0, v0.4s
298 ; CHECK-GI-NEXT: fmov w0, s0
300 %arr.load = load <16 x i32>, ptr %arr
301 %r = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %arr.load)
305 define i16 @oversized_umin_256(ptr nocapture readonly %arr) {
306 ; CHECK-SD-LABEL: oversized_umin_256:
307 ; CHECK-SD: // %bb.0:
308 ; CHECK-SD-NEXT: ldp q1, q0, [x0]
309 ; CHECK-SD-NEXT: umin v0.8h, v1.8h, v0.8h
310 ; CHECK-SD-NEXT: uminv h0, v0.8h
311 ; CHECK-SD-NEXT: fmov w0, s0
314 ; CHECK-GI-LABEL: oversized_umin_256:
315 ; CHECK-GI: // %bb.0:
316 ; CHECK-GI-NEXT: ldp q0, q1, [x0]
317 ; CHECK-GI-NEXT: umin v0.8h, v0.8h, v1.8h
318 ; CHECK-GI-NEXT: uminv h0, v0.8h
319 ; CHECK-GI-NEXT: fmov w0, s0
321 %arr.load = load <16 x i16>, ptr %arr
322 %r = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %arr.load)
326 define i32 @oversized_umin_512(ptr nocapture readonly %arr) {
327 ; CHECK-SD-LABEL: oversized_umin_512:
328 ; CHECK-SD: // %bb.0:
329 ; CHECK-SD-NEXT: ldp q0, q1, [x0, #32]
330 ; CHECK-SD-NEXT: ldp q2, q3, [x0]
331 ; CHECK-SD-NEXT: umin v1.4s, v3.4s, v1.4s
332 ; CHECK-SD-NEXT: umin v0.4s, v2.4s, v0.4s
333 ; CHECK-SD-NEXT: umin v0.4s, v0.4s, v1.4s
334 ; CHECK-SD-NEXT: uminv s0, v0.4s
335 ; CHECK-SD-NEXT: fmov w0, s0
338 ; CHECK-GI-LABEL: oversized_umin_512:
339 ; CHECK-GI: // %bb.0:
340 ; CHECK-GI-NEXT: ldp q0, q1, [x0]
341 ; CHECK-GI-NEXT: ldp q2, q3, [x0, #32]
342 ; CHECK-GI-NEXT: umin v0.4s, v0.4s, v1.4s
343 ; CHECK-GI-NEXT: umin v1.4s, v2.4s, v3.4s
344 ; CHECK-GI-NEXT: umin v0.4s, v0.4s, v1.4s
345 ; CHECK-GI-NEXT: uminv s0, v0.4s
346 ; CHECK-GI-NEXT: fmov w0, s0
348 %arr.load = load <16 x i32>, ptr %arr
349 %r = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %arr.load)
353 define i16 @oversized_smax_256(ptr nocapture readonly %arr) {
354 ; CHECK-SD-LABEL: oversized_smax_256:
355 ; CHECK-SD: // %bb.0:
356 ; CHECK-SD-NEXT: ldp q1, q0, [x0]
357 ; CHECK-SD-NEXT: smax v0.8h, v1.8h, v0.8h
358 ; CHECK-SD-NEXT: smaxv h0, v0.8h
359 ; CHECK-SD-NEXT: fmov w0, s0
362 ; CHECK-GI-LABEL: oversized_smax_256:
363 ; CHECK-GI: // %bb.0:
364 ; CHECK-GI-NEXT: ldp q0, q1, [x0]
365 ; CHECK-GI-NEXT: smax v0.8h, v0.8h, v1.8h
366 ; CHECK-GI-NEXT: smaxv h0, v0.8h
367 ; CHECK-GI-NEXT: fmov w0, s0
369 %arr.load = load <16 x i16>, ptr %arr
370 %r = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %arr.load)
374 define i32 @oversized_smax_512(ptr nocapture readonly %arr) {
375 ; CHECK-SD-LABEL: oversized_smax_512:
376 ; CHECK-SD: // %bb.0:
377 ; CHECK-SD-NEXT: ldp q0, q1, [x0, #32]
378 ; CHECK-SD-NEXT: ldp q2, q3, [x0]
379 ; CHECK-SD-NEXT: smax v1.4s, v3.4s, v1.4s
380 ; CHECK-SD-NEXT: smax v0.4s, v2.4s, v0.4s
381 ; CHECK-SD-NEXT: smax v0.4s, v0.4s, v1.4s
382 ; CHECK-SD-NEXT: smaxv s0, v0.4s
383 ; CHECK-SD-NEXT: fmov w0, s0
386 ; CHECK-GI-LABEL: oversized_smax_512:
387 ; CHECK-GI: // %bb.0:
388 ; CHECK-GI-NEXT: ldp q0, q1, [x0]
389 ; CHECK-GI-NEXT: ldp q2, q3, [x0, #32]
390 ; CHECK-GI-NEXT: smax v0.4s, v0.4s, v1.4s
391 ; CHECK-GI-NEXT: smax v1.4s, v2.4s, v3.4s
392 ; CHECK-GI-NEXT: smax v0.4s, v0.4s, v1.4s
393 ; CHECK-GI-NEXT: smaxv s0, v0.4s
394 ; CHECK-GI-NEXT: fmov w0, s0
396 %arr.load = load <16 x i32>, ptr %arr
397 %r = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %arr.load)
401 define i16 @oversized_smin_256(ptr nocapture readonly %arr) {
402 ; CHECK-SD-LABEL: oversized_smin_256:
403 ; CHECK-SD: // %bb.0:
404 ; CHECK-SD-NEXT: ldp q1, q0, [x0]
405 ; CHECK-SD-NEXT: smin v0.8h, v1.8h, v0.8h
406 ; CHECK-SD-NEXT: sminv h0, v0.8h
407 ; CHECK-SD-NEXT: fmov w0, s0
410 ; CHECK-GI-LABEL: oversized_smin_256:
411 ; CHECK-GI: // %bb.0:
412 ; CHECK-GI-NEXT: ldp q0, q1, [x0]
413 ; CHECK-GI-NEXT: smin v0.8h, v0.8h, v1.8h
414 ; CHECK-GI-NEXT: sminv h0, v0.8h
415 ; CHECK-GI-NEXT: fmov w0, s0
417 %arr.load = load <16 x i16>, ptr %arr
418 %r = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %arr.load)
423 define i32 @oversized_smin_512(ptr nocapture readonly %arr) {
424 ; CHECK-SD-LABEL: oversized_smin_512:
425 ; CHECK-SD: // %bb.0:
426 ; CHECK-SD-NEXT: ldp q0, q1, [x0, #32]
427 ; CHECK-SD-NEXT: ldp q2, q3, [x0]
428 ; CHECK-SD-NEXT: smin v1.4s, v3.4s, v1.4s
429 ; CHECK-SD-NEXT: smin v0.4s, v2.4s, v0.4s
430 ; CHECK-SD-NEXT: smin v0.4s, v0.4s, v1.4s
431 ; CHECK-SD-NEXT: sminv s0, v0.4s
432 ; CHECK-SD-NEXT: fmov w0, s0
435 ; CHECK-GI-LABEL: oversized_smin_512:
436 ; CHECK-GI: // %bb.0:
437 ; CHECK-GI-NEXT: ldp q0, q1, [x0]
438 ; CHECK-GI-NEXT: ldp q2, q3, [x0, #32]
439 ; CHECK-GI-NEXT: smin v0.4s, v0.4s, v1.4s
440 ; CHECK-GI-NEXT: smin v1.4s, v2.4s, v3.4s
441 ; CHECK-GI-NEXT: smin v0.4s, v0.4s, v1.4s
442 ; CHECK-GI-NEXT: sminv s0, v0.4s
443 ; CHECK-GI-NEXT: fmov w0, s0
445 %arr.load = load <16 x i32>, ptr %arr
446 %r = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %arr.load)
450 define i8 @sminv_v2i8(<2 x i8> %a) {
451 ; CHECK-SD-LABEL: sminv_v2i8:
452 ; CHECK-SD: // %bb.0: // %entry
453 ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
454 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
455 ; CHECK-SD-NEXT: sminp v0.2s, v0.2s, v0.2s
456 ; CHECK-SD-NEXT: fmov w0, s0
459 ; CHECK-GI-LABEL: sminv_v2i8:
460 ; CHECK-GI: // %bb.0: // %entry
461 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
462 ; CHECK-GI-NEXT: mov s1, v0.s[1]
463 ; CHECK-GI-NEXT: fmov w8, s0
464 ; CHECK-GI-NEXT: sxtb w8, w8
465 ; CHECK-GI-NEXT: fmov w9, s1
466 ; CHECK-GI-NEXT: cmp w8, w9, sxtb
467 ; CHECK-GI-NEXT: fcsel s0, s0, s1, lt
468 ; CHECK-GI-NEXT: fmov w0, s0
471 %arg1 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> %a)
475 define i8 @sminv_v3i8(<3 x i8> %a) {
476 ; CHECK-SD-LABEL: sminv_v3i8:
477 ; CHECK-SD: // %bb.0: // %entry
478 ; CHECK-SD-NEXT: movi v0.4h, #127
479 ; CHECK-SD-NEXT: mov v0.h[0], w0
480 ; CHECK-SD-NEXT: mov v0.h[1], w1
481 ; CHECK-SD-NEXT: mov v0.h[2], w2
482 ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
483 ; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
484 ; CHECK-SD-NEXT: sminv h0, v0.4h
485 ; CHECK-SD-NEXT: fmov w0, s0
488 ; CHECK-GI-LABEL: sminv_v3i8:
489 ; CHECK-GI: // %bb.0: // %entry
490 ; CHECK-GI-NEXT: sxtb w8, w0
491 ; CHECK-GI-NEXT: cmp w8, w1, sxtb
492 ; CHECK-GI-NEXT: csel w8, w0, w1, lt
493 ; CHECK-GI-NEXT: sxtb w9, w8
494 ; CHECK-GI-NEXT: cmp w9, w2, sxtb
495 ; CHECK-GI-NEXT: csel w0, w8, w2, lt
498 %arg1 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> %a)
502 define i8 @sminv_v4i8(<4 x i8> %a) {
503 ; CHECK-SD-LABEL: sminv_v4i8:
504 ; CHECK-SD: // %bb.0: // %entry
505 ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
506 ; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
507 ; CHECK-SD-NEXT: sminv h0, v0.4h
508 ; CHECK-SD-NEXT: fmov w0, s0
511 ; CHECK-GI-LABEL: sminv_v4i8:
512 ; CHECK-GI: // %bb.0: // %entry
513 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
514 ; CHECK-GI-NEXT: umov w8, v0.h[0]
515 ; CHECK-GI-NEXT: umov w9, v0.h[1]
516 ; CHECK-GI-NEXT: umov w10, v0.h[2]
517 ; CHECK-GI-NEXT: umov w12, v0.h[3]
518 ; CHECK-GI-NEXT: sxtb w11, w8
519 ; CHECK-GI-NEXT: cmp w11, w9, sxtb
520 ; CHECK-GI-NEXT: sxtb w11, w10
521 ; CHECK-GI-NEXT: csel w8, w8, w9, lt
522 ; CHECK-GI-NEXT: cmp w11, w12, sxtb
523 ; CHECK-GI-NEXT: sxtb w9, w8
524 ; CHECK-GI-NEXT: csel w10, w10, w12, lt
525 ; CHECK-GI-NEXT: cmp w9, w10, sxtb
526 ; CHECK-GI-NEXT: csel w0, w8, w10, lt
529 %arg1 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> %a)
533 define i8 @sminv_v8i8(<8 x i8> %a) {
534 ; CHECK-LABEL: sminv_v8i8:
535 ; CHECK: // %bb.0: // %entry
536 ; CHECK-NEXT: sminv b0, v0.8b
537 ; CHECK-NEXT: fmov w0, s0
540 %arg1 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a)
544 define i8 @sminv_v16i8(<16 x i8> %a) {
545 ; CHECK-LABEL: sminv_v16i8:
546 ; CHECK: // %bb.0: // %entry
547 ; CHECK-NEXT: sminv b0, v0.16b
548 ; CHECK-NEXT: fmov w0, s0
551 %arg1 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a)
555 define i8 @sminv_v32i8(<32 x i8> %a) {
556 ; CHECK-LABEL: sminv_v32i8:
557 ; CHECK: // %bb.0: // %entry
558 ; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b
559 ; CHECK-NEXT: sminv b0, v0.16b
560 ; CHECK-NEXT: fmov w0, s0
563 %arg1 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %a)
567 define i16 @sminv_v2i16(<2 x i16> %a) {
568 ; CHECK-SD-LABEL: sminv_v2i16:
569 ; CHECK-SD: // %bb.0: // %entry
570 ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16
571 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #16
572 ; CHECK-SD-NEXT: sminp v0.2s, v0.2s, v0.2s
573 ; CHECK-SD-NEXT: fmov w0, s0
576 ; CHECK-GI-LABEL: sminv_v2i16:
577 ; CHECK-GI: // %bb.0: // %entry
578 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
579 ; CHECK-GI-NEXT: mov s1, v0.s[1]
580 ; CHECK-GI-NEXT: fmov w8, s0
581 ; CHECK-GI-NEXT: sxth w8, w8
582 ; CHECK-GI-NEXT: fmov w9, s1
583 ; CHECK-GI-NEXT: cmp w8, w9, sxth
584 ; CHECK-GI-NEXT: fcsel s0, s0, s1, lt
585 ; CHECK-GI-NEXT: fmov w0, s0
588 %arg1 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> %a)
592 define i16 @sminv_v3i16(<3 x i16> %a) {
593 ; CHECK-LABEL: sminv_v3i16:
594 ; CHECK: // %bb.0: // %entry
595 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
596 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
597 ; CHECK-NEXT: mov v0.h[3], w8
598 ; CHECK-NEXT: sminv h0, v0.4h
599 ; CHECK-NEXT: fmov w0, s0
602 %arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a)
606 define i16 @sminv_v4i16(<4 x i16> %a) {
607 ; CHECK-LABEL: sminv_v4i16:
608 ; CHECK: // %bb.0: // %entry
609 ; CHECK-NEXT: sminv h0, v0.4h
610 ; CHECK-NEXT: fmov w0, s0
613 %arg1 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a)
617 define i16 @sminv_v8i16(<8 x i16> %a) {
618 ; CHECK-LABEL: sminv_v8i16:
619 ; CHECK: // %bb.0: // %entry
620 ; CHECK-NEXT: sminv h0, v0.8h
621 ; CHECK-NEXT: fmov w0, s0
624 %arg1 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a)
628 define i16 @sminv_v16i16(<16 x i16> %a) {
629 ; CHECK-LABEL: sminv_v16i16:
630 ; CHECK: // %bb.0: // %entry
631 ; CHECK-NEXT: smin v0.8h, v0.8h, v1.8h
632 ; CHECK-NEXT: sminv h0, v0.8h
633 ; CHECK-NEXT: fmov w0, s0
636 %arg1 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %a)
640 define i32 @sminv_v2i32(<2 x i32> %a) {
641 ; CHECK-LABEL: sminv_v2i32:
642 ; CHECK: // %bb.0: // %entry
643 ; CHECK-NEXT: sminp v0.2s, v0.2s, v0.2s
644 ; CHECK-NEXT: fmov w0, s0
647 %arg1 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a)
651 define i32 @sminv_v3i32(<3 x i32> %a) {
652 ; CHECK-LABEL: sminv_v3i32:
653 ; CHECK: // %bb.0: // %entry
654 ; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
655 ; CHECK-NEXT: mov v0.s[3], w8
656 ; CHECK-NEXT: sminv s0, v0.4s
657 ; CHECK-NEXT: fmov w0, s0
660 %arg1 = call i32 @llvm.vector.reduce.smin.v3i32(<3 x i32> %a)
664 define i32 @sminv_v4i32(<4 x i32> %a) {
665 ; CHECK-LABEL: sminv_v4i32:
666 ; CHECK: // %bb.0: // %entry
667 ; CHECK-NEXT: sminv s0, v0.4s
668 ; CHECK-NEXT: fmov w0, s0
671 %arg1 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a)
675 define i32 @sminv_v8i32(<8 x i32> %a) {
676 ; CHECK-LABEL: sminv_v8i32:
677 ; CHECK: // %bb.0: // %entry
678 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
679 ; CHECK-NEXT: sminv s0, v0.4s
680 ; CHECK-NEXT: fmov w0, s0
683 %arg1 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %a)
687 define i64 @sminv_v2i64(<2 x i64> %a) {
688 ; CHECK-SD-LABEL: sminv_v2i64:
689 ; CHECK-SD: // %bb.0: // %entry
690 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
691 ; CHECK-SD-NEXT: cmgt d2, d1, d0
692 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
693 ; CHECK-SD-NEXT: fmov x0, d0
696 ; CHECK-GI-LABEL: sminv_v2i64:
697 ; CHECK-GI: // %bb.0: // %entry
698 ; CHECK-GI-NEXT: mov d1, v0.d[1]
699 ; CHECK-GI-NEXT: fmov x8, d0
700 ; CHECK-GI-NEXT: fmov x9, d1
701 ; CHECK-GI-NEXT: cmp x8, x9
702 ; CHECK-GI-NEXT: fcsel d0, d0, d1, lt
703 ; CHECK-GI-NEXT: fmov x0, d0
706 %arg1 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a)
710 define i64 @sminv_v3i64(<3 x i64> %a) {
711 ; CHECK-SD-LABEL: sminv_v3i64:
712 ; CHECK-SD: // %bb.0: // %entry
713 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
714 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
715 ; CHECK-SD-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
716 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
717 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
718 ; CHECK-SD-NEXT: mov v2.d[1], x8
719 ; CHECK-SD-NEXT: cmgt v1.2d, v2.2d, v0.2d
720 ; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
721 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
722 ; CHECK-SD-NEXT: cmgt d2, d1, d0
723 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
724 ; CHECK-SD-NEXT: fmov x0, d0
727 ; CHECK-GI-LABEL: sminv_v3i64:
728 ; CHECK-GI: // %bb.0: // %entry
729 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
730 ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
731 ; CHECK-GI-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
732 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
733 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
734 ; CHECK-GI-NEXT: mov v2.d[1], x8
735 ; CHECK-GI-NEXT: cmgt v1.2d, v2.2d, v0.2d
736 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
737 ; CHECK-GI-NEXT: mov d1, v0.d[1]
738 ; CHECK-GI-NEXT: fmov x8, d0
739 ; CHECK-GI-NEXT: fmov x9, d1
740 ; CHECK-GI-NEXT: cmp x8, x9
741 ; CHECK-GI-NEXT: fcsel d0, d0, d1, lt
742 ; CHECK-GI-NEXT: fmov x0, d0
745 %arg1 = call i64 @llvm.vector.reduce.smin.v3i64(<3 x i64> %a)
749 define i64 @sminv_v4i64(<4 x i64> %a) {
750 ; CHECK-SD-LABEL: sminv_v4i64:
751 ; CHECK-SD: // %bb.0: // %entry
752 ; CHECK-SD-NEXT: cmgt v2.2d, v1.2d, v0.2d
753 ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
754 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
755 ; CHECK-SD-NEXT: cmgt d2, d1, d0
756 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
757 ; CHECK-SD-NEXT: fmov x0, d0
760 ; CHECK-GI-LABEL: sminv_v4i64:
761 ; CHECK-GI: // %bb.0: // %entry
762 ; CHECK-GI-NEXT: cmgt v2.2d, v1.2d, v0.2d
763 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
764 ; CHECK-GI-NEXT: mov d1, v0.d[1]
765 ; CHECK-GI-NEXT: fmov x8, d0
766 ; CHECK-GI-NEXT: fmov x9, d1
767 ; CHECK-GI-NEXT: cmp x8, x9
768 ; CHECK-GI-NEXT: fcsel d0, d0, d1, lt
769 ; CHECK-GI-NEXT: fmov x0, d0
772 %arg1 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %a)
776 define i128 @sminv_v2i128(<2 x i128> %a) {
777 ; CHECK-SD-LABEL: sminv_v2i128:
778 ; CHECK-SD: // %bb.0: // %entry
779 ; CHECK-SD-NEXT: cmp x0, x2
780 ; CHECK-SD-NEXT: sbcs xzr, x1, x3
781 ; CHECK-SD-NEXT: csel x0, x0, x2, lt
782 ; CHECK-SD-NEXT: csel x1, x1, x3, lt
785 ; CHECK-GI-LABEL: sminv_v2i128:
786 ; CHECK-GI: // %bb.0: // %entry
787 ; CHECK-GI-NEXT: cmp x0, x2
788 ; CHECK-GI-NEXT: cset w8, lo
789 ; CHECK-GI-NEXT: cmp x1, x3
790 ; CHECK-GI-NEXT: cset w9, lt
791 ; CHECK-GI-NEXT: csel w8, w8, w9, eq
792 ; CHECK-GI-NEXT: tst w8, #0x1
793 ; CHECK-GI-NEXT: csel x0, x0, x2, ne
794 ; CHECK-GI-NEXT: csel x1, x1, x3, ne
797 %arg1 = call i128 @llvm.vector.reduce.smin.v2i128(<2 x i128> %a)
801 define i8 @smaxv_v2i8(<2 x i8> %a) {
802 ; CHECK-SD-LABEL: smaxv_v2i8:
803 ; CHECK-SD: // %bb.0: // %entry
804 ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
805 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
806 ; CHECK-SD-NEXT: smaxp v0.2s, v0.2s, v0.2s
807 ; CHECK-SD-NEXT: fmov w0, s0
810 ; CHECK-GI-LABEL: smaxv_v2i8:
811 ; CHECK-GI: // %bb.0: // %entry
812 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
813 ; CHECK-GI-NEXT: mov s1, v0.s[1]
814 ; CHECK-GI-NEXT: fmov w8, s0
815 ; CHECK-GI-NEXT: sxtb w8, w8
816 ; CHECK-GI-NEXT: fmov w9, s1
817 ; CHECK-GI-NEXT: cmp w8, w9, sxtb
818 ; CHECK-GI-NEXT: fcsel s0, s0, s1, gt
819 ; CHECK-GI-NEXT: fmov w0, s0
822 %arg1 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> %a)
826 define i8 @smaxv_v3i8(<3 x i8> %a) {
827 ; CHECK-SD-LABEL: smaxv_v3i8:
828 ; CHECK-SD: // %bb.0: // %entry
829 ; CHECK-SD-NEXT: movi v0.4h, #128
830 ; CHECK-SD-NEXT: mov v0.h[0], w0
831 ; CHECK-SD-NEXT: mov v0.h[1], w1
832 ; CHECK-SD-NEXT: mov v0.h[2], w2
833 ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
834 ; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
835 ; CHECK-SD-NEXT: smaxv h0, v0.4h
836 ; CHECK-SD-NEXT: fmov w0, s0
839 ; CHECK-GI-LABEL: smaxv_v3i8:
840 ; CHECK-GI: // %bb.0: // %entry
841 ; CHECK-GI-NEXT: sxtb w8, w0
842 ; CHECK-GI-NEXT: cmp w8, w1, sxtb
843 ; CHECK-GI-NEXT: csel w8, w0, w1, gt
844 ; CHECK-GI-NEXT: sxtb w9, w8
845 ; CHECK-GI-NEXT: cmp w9, w2, sxtb
846 ; CHECK-GI-NEXT: csel w0, w8, w2, gt
849 %arg1 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> %a)
853 define i8 @smaxv_v4i8(<4 x i8> %a) {
854 ; CHECK-SD-LABEL: smaxv_v4i8:
855 ; CHECK-SD: // %bb.0: // %entry
856 ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
857 ; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
858 ; CHECK-SD-NEXT: smaxv h0, v0.4h
859 ; CHECK-SD-NEXT: fmov w0, s0
862 ; CHECK-GI-LABEL: smaxv_v4i8:
863 ; CHECK-GI: // %bb.0: // %entry
864 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
865 ; CHECK-GI-NEXT: umov w8, v0.h[0]
866 ; CHECK-GI-NEXT: umov w9, v0.h[1]
867 ; CHECK-GI-NEXT: umov w10, v0.h[2]
868 ; CHECK-GI-NEXT: umov w12, v0.h[3]
869 ; CHECK-GI-NEXT: sxtb w11, w8
870 ; CHECK-GI-NEXT: cmp w11, w9, sxtb
871 ; CHECK-GI-NEXT: sxtb w11, w10
872 ; CHECK-GI-NEXT: csel w8, w8, w9, gt
873 ; CHECK-GI-NEXT: cmp w11, w12, sxtb
874 ; CHECK-GI-NEXT: sxtb w9, w8
875 ; CHECK-GI-NEXT: csel w10, w10, w12, gt
876 ; CHECK-GI-NEXT: cmp w9, w10, sxtb
877 ; CHECK-GI-NEXT: csel w0, w8, w10, gt
880 %arg1 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> %a)
884 define i8 @smaxv_v8i8(<8 x i8> %a) {
885 ; CHECK-LABEL: smaxv_v8i8:
886 ; CHECK: // %bb.0: // %entry
887 ; CHECK-NEXT: smaxv b0, v0.8b
888 ; CHECK-NEXT: fmov w0, s0
891 %arg1 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a)
895 define i8 @smaxv_v16i8(<16 x i8> %a) {
896 ; CHECK-LABEL: smaxv_v16i8:
897 ; CHECK: // %bb.0: // %entry
898 ; CHECK-NEXT: smaxv b0, v0.16b
899 ; CHECK-NEXT: fmov w0, s0
902 %arg1 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a)
906 define i8 @smaxv_v32i8(<32 x i8> %a) {
907 ; CHECK-LABEL: smaxv_v32i8:
908 ; CHECK: // %bb.0: // %entry
909 ; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b
910 ; CHECK-NEXT: smaxv b0, v0.16b
911 ; CHECK-NEXT: fmov w0, s0
914 %arg1 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %a)
918 define i16 @smaxv_v2i16(<2 x i16> %a) {
919 ; CHECK-SD-LABEL: smaxv_v2i16:
920 ; CHECK-SD: // %bb.0: // %entry
921 ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16
922 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #16
923 ; CHECK-SD-NEXT: smaxp v0.2s, v0.2s, v0.2s
924 ; CHECK-SD-NEXT: fmov w0, s0
927 ; CHECK-GI-LABEL: smaxv_v2i16:
928 ; CHECK-GI: // %bb.0: // %entry
929 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
930 ; CHECK-GI-NEXT: mov s1, v0.s[1]
931 ; CHECK-GI-NEXT: fmov w8, s0
932 ; CHECK-GI-NEXT: sxth w8, w8
933 ; CHECK-GI-NEXT: fmov w9, s1
934 ; CHECK-GI-NEXT: cmp w8, w9, sxth
935 ; CHECK-GI-NEXT: fcsel s0, s0, s1, gt
936 ; CHECK-GI-NEXT: fmov w0, s0
939 %arg1 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> %a)
943 define i16 @smaxv_v3i16(<3 x i16> %a) {
944 ; CHECK-SD-LABEL: smaxv_v3i16:
945 ; CHECK-SD: // %bb.0: // %entry
946 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
947 ; CHECK-SD-NEXT: mov w8, #-32768 // =0xffff8000
948 ; CHECK-SD-NEXT: mov v0.h[3], w8
949 ; CHECK-SD-NEXT: smaxv h0, v0.4h
950 ; CHECK-SD-NEXT: fmov w0, s0
953 ; CHECK-GI-LABEL: smaxv_v3i16:
954 ; CHECK-GI: // %bb.0: // %entry
955 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
956 ; CHECK-GI-NEXT: mov w8, #32768 // =0x8000
957 ; CHECK-GI-NEXT: mov v0.h[3], w8
958 ; CHECK-GI-NEXT: smaxv h0, v0.4h
959 ; CHECK-GI-NEXT: fmov w0, s0
962 %arg1 = call i16 @llvm.vector.reduce.smax.v3i16(<3 x i16> %a)
966 define i16 @smaxv_v4i16(<4 x i16> %a) {
967 ; CHECK-LABEL: smaxv_v4i16:
968 ; CHECK: // %bb.0: // %entry
969 ; CHECK-NEXT: smaxv h0, v0.4h
970 ; CHECK-NEXT: fmov w0, s0
973 %arg1 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a)
977 define i16 @smaxv_v8i16(<8 x i16> %a) {
978 ; CHECK-LABEL: smaxv_v8i16:
979 ; CHECK: // %bb.0: // %entry
980 ; CHECK-NEXT: smaxv h0, v0.8h
981 ; CHECK-NEXT: fmov w0, s0
984 %arg1 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a)
988 define i16 @smaxv_v16i16(<16 x i16> %a) {
989 ; CHECK-LABEL: smaxv_v16i16:
990 ; CHECK: // %bb.0: // %entry
991 ; CHECK-NEXT: smax v0.8h, v0.8h, v1.8h
992 ; CHECK-NEXT: smaxv h0, v0.8h
993 ; CHECK-NEXT: fmov w0, s0
996 %arg1 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %a)
1000 define i32 @smaxv_v2i32(<2 x i32> %a) {
1001 ; CHECK-LABEL: smaxv_v2i32:
1002 ; CHECK: // %bb.0: // %entry
1003 ; CHECK-NEXT: smaxp v0.2s, v0.2s, v0.2s
1004 ; CHECK-NEXT: fmov w0, s0
1007 %arg1 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a)
1011 define i32 @smaxv_v3i32(<3 x i32> %a) {
1012 ; CHECK-LABEL: smaxv_v3i32:
1013 ; CHECK: // %bb.0: // %entry
1014 ; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
1015 ; CHECK-NEXT: mov v0.s[3], w8
1016 ; CHECK-NEXT: smaxv s0, v0.4s
1017 ; CHECK-NEXT: fmov w0, s0
1020 %arg1 = call i32 @llvm.vector.reduce.smax.v3i32(<3 x i32> %a)
1024 define i32 @smaxv_v4i32(<4 x i32> %a) {
1025 ; CHECK-LABEL: smaxv_v4i32:
1026 ; CHECK: // %bb.0: // %entry
1027 ; CHECK-NEXT: smaxv s0, v0.4s
1028 ; CHECK-NEXT: fmov w0, s0
1031 %arg1 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
1035 define i32 @smaxv_v8i32(<8 x i32> %a) {
1036 ; CHECK-LABEL: smaxv_v8i32:
1037 ; CHECK: // %bb.0: // %entry
1038 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
1039 ; CHECK-NEXT: smaxv s0, v0.4s
1040 ; CHECK-NEXT: fmov w0, s0
1043 %arg1 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %a)
1047 define i64 @smaxv_v2i64(<2 x i64> %a) {
1048 ; CHECK-SD-LABEL: smaxv_v2i64:
1049 ; CHECK-SD: // %bb.0: // %entry
1050 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1051 ; CHECK-SD-NEXT: cmgt d2, d0, d1
1052 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
1053 ; CHECK-SD-NEXT: fmov x0, d0
1054 ; CHECK-SD-NEXT: ret
1056 ; CHECK-GI-LABEL: smaxv_v2i64:
1057 ; CHECK-GI: // %bb.0: // %entry
1058 ; CHECK-GI-NEXT: mov d1, v0.d[1]
1059 ; CHECK-GI-NEXT: fmov x8, d0
1060 ; CHECK-GI-NEXT: fmov x9, d1
1061 ; CHECK-GI-NEXT: cmp x8, x9
1062 ; CHECK-GI-NEXT: fcsel d0, d0, d1, gt
1063 ; CHECK-GI-NEXT: fmov x0, d0
1064 ; CHECK-GI-NEXT: ret
1066 %arg1 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a)
1070 define i64 @smaxv_v3i64(<3 x i64> %a) {
1071 ; CHECK-SD-LABEL: smaxv_v3i64:
1072 ; CHECK-SD: // %bb.0: // %entry
1073 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
1074 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1075 ; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
1076 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
1077 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
1078 ; CHECK-SD-NEXT: mov v2.d[1], x8
1079 ; CHECK-SD-NEXT: cmgt v1.2d, v0.2d, v2.2d
1080 ; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
1081 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1082 ; CHECK-SD-NEXT: cmgt d2, d0, d1
1083 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
1084 ; CHECK-SD-NEXT: fmov x0, d0
1085 ; CHECK-SD-NEXT: ret
1087 ; CHECK-GI-LABEL: smaxv_v3i64:
1088 ; CHECK-GI: // %bb.0: // %entry
1089 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1090 ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
1091 ; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
1092 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
1093 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
1094 ; CHECK-GI-NEXT: mov v2.d[1], x8
1095 ; CHECK-GI-NEXT: cmgt v1.2d, v0.2d, v2.2d
1096 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
1097 ; CHECK-GI-NEXT: mov d1, v0.d[1]
1098 ; CHECK-GI-NEXT: fmov x8, d0
1099 ; CHECK-GI-NEXT: fmov x9, d1
1100 ; CHECK-GI-NEXT: cmp x8, x9
1101 ; CHECK-GI-NEXT: fcsel d0, d0, d1, gt
1102 ; CHECK-GI-NEXT: fmov x0, d0
1103 ; CHECK-GI-NEXT: ret
1105 %arg1 = call i64 @llvm.vector.reduce.smax.v3i64(<3 x i64> %a)
1109 define i64 @smaxv_v4i64(<4 x i64> %a) {
1110 ; CHECK-SD-LABEL: smaxv_v4i64:
1111 ; CHECK-SD: // %bb.0: // %entry
1112 ; CHECK-SD-NEXT: cmgt v2.2d, v0.2d, v1.2d
1113 ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
1114 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1115 ; CHECK-SD-NEXT: cmgt d2, d0, d1
1116 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
1117 ; CHECK-SD-NEXT: fmov x0, d0
1118 ; CHECK-SD-NEXT: ret
1120 ; CHECK-GI-LABEL: smaxv_v4i64:
1121 ; CHECK-GI: // %bb.0: // %entry
1122 ; CHECK-GI-NEXT: cmgt v2.2d, v0.2d, v1.2d
1123 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
1124 ; CHECK-GI-NEXT: mov d1, v0.d[1]
1125 ; CHECK-GI-NEXT: fmov x8, d0
1126 ; CHECK-GI-NEXT: fmov x9, d1
1127 ; CHECK-GI-NEXT: cmp x8, x9
1128 ; CHECK-GI-NEXT: fcsel d0, d0, d1, gt
1129 ; CHECK-GI-NEXT: fmov x0, d0
1130 ; CHECK-GI-NEXT: ret
1132 %arg1 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %a)
1136 define i128 @smaxv_v2i128(<2 x i128> %a) {
1137 ; CHECK-SD-LABEL: smaxv_v2i128:
1138 ; CHECK-SD: // %bb.0: // %entry
1139 ; CHECK-SD-NEXT: cmp x2, x0
1140 ; CHECK-SD-NEXT: sbcs xzr, x3, x1
1141 ; CHECK-SD-NEXT: csel x0, x0, x2, lt
1142 ; CHECK-SD-NEXT: csel x1, x1, x3, lt
1143 ; CHECK-SD-NEXT: ret
1145 ; CHECK-GI-LABEL: smaxv_v2i128:
1146 ; CHECK-GI: // %bb.0: // %entry
1147 ; CHECK-GI-NEXT: cmp x0, x2
1148 ; CHECK-GI-NEXT: cset w8, hi
1149 ; CHECK-GI-NEXT: cmp x1, x3
1150 ; CHECK-GI-NEXT: cset w9, gt
1151 ; CHECK-GI-NEXT: csel w8, w8, w9, eq
1152 ; CHECK-GI-NEXT: tst w8, #0x1
1153 ; CHECK-GI-NEXT: csel x0, x0, x2, ne
1154 ; CHECK-GI-NEXT: csel x1, x1, x3, ne
1155 ; CHECK-GI-NEXT: ret
1157 %arg1 = call i128 @llvm.vector.reduce.smax.v2i128(<2 x i128> %a)
1161 define i8 @uminv_v2i8(<2 x i8> %a) {
1162 ; CHECK-SD-LABEL: uminv_v2i8:
1163 ; CHECK-SD: // %bb.0: // %entry
1164 ; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
1165 ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
1166 ; CHECK-SD-NEXT: uminp v0.2s, v0.2s, v0.2s
1167 ; CHECK-SD-NEXT: fmov w0, s0
1168 ; CHECK-SD-NEXT: ret
1170 ; CHECK-GI-LABEL: uminv_v2i8:
1171 ; CHECK-GI: // %bb.0: // %entry
1172 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1173 ; CHECK-GI-NEXT: mov s1, v0.s[1]
1174 ; CHECK-GI-NEXT: fmov w8, s0
1175 ; CHECK-GI-NEXT: and w8, w8, #0xff
1176 ; CHECK-GI-NEXT: fmov w9, s1
1177 ; CHECK-GI-NEXT: cmp w8, w9, uxtb
1178 ; CHECK-GI-NEXT: fcsel s0, s0, s1, lo
1179 ; CHECK-GI-NEXT: fmov w0, s0
1180 ; CHECK-GI-NEXT: ret
1182 %arg1 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> %a)
1186 define i8 @uminv_v3i8(<3 x i8> %a) {
1187 ; CHECK-SD-LABEL: uminv_v3i8:
1188 ; CHECK-SD: // %bb.0: // %entry
1189 ; CHECK-SD-NEXT: movi d0, #0xff00ff00ff00ff
1190 ; CHECK-SD-NEXT: mov v0.h[0], w0
1191 ; CHECK-SD-NEXT: mov v0.h[1], w1
1192 ; CHECK-SD-NEXT: mov v0.h[2], w2
1193 ; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
1194 ; CHECK-SD-NEXT: uminv h0, v0.4h
1195 ; CHECK-SD-NEXT: fmov w0, s0
1196 ; CHECK-SD-NEXT: ret
1198 ; CHECK-GI-LABEL: uminv_v3i8:
1199 ; CHECK-GI: // %bb.0: // %entry
1200 ; CHECK-GI-NEXT: and w8, w0, #0xff
1201 ; CHECK-GI-NEXT: cmp w8, w1, uxtb
1202 ; CHECK-GI-NEXT: csel w8, w0, w1, lo
1203 ; CHECK-GI-NEXT: and w9, w8, #0xff
1204 ; CHECK-GI-NEXT: cmp w9, w2, uxtb
1205 ; CHECK-GI-NEXT: csel w0, w8, w2, lo
1206 ; CHECK-GI-NEXT: ret
1208 %arg1 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> %a)
1212 define i8 @uminv_v4i8(<4 x i8> %a) {
1213 ; CHECK-SD-LABEL: uminv_v4i8:
1214 ; CHECK-SD: // %bb.0: // %entry
1215 ; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
1216 ; CHECK-SD-NEXT: uminv h0, v0.4h
1217 ; CHECK-SD-NEXT: fmov w0, s0
1218 ; CHECK-SD-NEXT: ret
1220 ; CHECK-GI-LABEL: uminv_v4i8:
1221 ; CHECK-GI: // %bb.0: // %entry
1222 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1223 ; CHECK-GI-NEXT: umov w8, v0.h[0]
1224 ; CHECK-GI-NEXT: umov w9, v0.h[1]
1225 ; CHECK-GI-NEXT: umov w10, v0.h[2]
1226 ; CHECK-GI-NEXT: umov w11, v0.h[3]
1227 ; CHECK-GI-NEXT: and w12, w8, #0xff
1228 ; CHECK-GI-NEXT: cmp w12, w9, uxtb
1229 ; CHECK-GI-NEXT: and w12, w10, #0xff
1230 ; CHECK-GI-NEXT: csel w8, w8, w9, lo
1231 ; CHECK-GI-NEXT: cmp w12, w11, uxtb
1232 ; CHECK-GI-NEXT: csel w9, w10, w11, lo
1233 ; CHECK-GI-NEXT: and w10, w8, #0xff
1234 ; CHECK-GI-NEXT: cmp w10, w9, uxtb
1235 ; CHECK-GI-NEXT: csel w0, w8, w9, lo
1236 ; CHECK-GI-NEXT: ret
1238 %arg1 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> %a)
1242 define i8 @uminv_v8i8(<8 x i8> %a) {
1243 ; CHECK-LABEL: uminv_v8i8:
1244 ; CHECK: // %bb.0: // %entry
1245 ; CHECK-NEXT: uminv b0, v0.8b
1246 ; CHECK-NEXT: fmov w0, s0
1249 %arg1 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a)
1253 define i8 @uminv_v16i8(<16 x i8> %a) {
1254 ; CHECK-LABEL: uminv_v16i8:
1255 ; CHECK: // %bb.0: // %entry
1256 ; CHECK-NEXT: uminv b0, v0.16b
1257 ; CHECK-NEXT: fmov w0, s0
1260 %arg1 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a)
1264 define i8 @uminv_v32i8(<32 x i8> %a) {
1265 ; CHECK-LABEL: uminv_v32i8:
1266 ; CHECK: // %bb.0: // %entry
1267 ; CHECK-NEXT: umin v0.16b, v0.16b, v1.16b
1268 ; CHECK-NEXT: uminv b0, v0.16b
1269 ; CHECK-NEXT: fmov w0, s0
1272 %arg1 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %a)
1276 define i16 @uminv_v2i16(<2 x i16> %a) {
1277 ; CHECK-SD-LABEL: uminv_v2i16:
1278 ; CHECK-SD: // %bb.0: // %entry
1279 ; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff
1280 ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
1281 ; CHECK-SD-NEXT: uminp v0.2s, v0.2s, v0.2s
1282 ; CHECK-SD-NEXT: fmov w0, s0
1283 ; CHECK-SD-NEXT: ret
1285 ; CHECK-GI-LABEL: uminv_v2i16:
1286 ; CHECK-GI: // %bb.0: // %entry
1287 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1288 ; CHECK-GI-NEXT: mov s1, v0.s[1]
1289 ; CHECK-GI-NEXT: fmov w8, s0
1290 ; CHECK-GI-NEXT: and w8, w8, #0xffff
1291 ; CHECK-GI-NEXT: fmov w9, s1
1292 ; CHECK-GI-NEXT: cmp w8, w9, uxth
1293 ; CHECK-GI-NEXT: fcsel s0, s0, s1, lo
1294 ; CHECK-GI-NEXT: fmov w0, s0
1295 ; CHECK-GI-NEXT: ret
1297 %arg1 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> %a)
1301 define i16 @uminv_v3i16(<3 x i16> %a) {
1302 ; CHECK-SD-LABEL: uminv_v3i16:
1303 ; CHECK-SD: // %bb.0: // %entry
1304 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1305 ; CHECK-SD-NEXT: mov w8, #-1 // =0xffffffff
1306 ; CHECK-SD-NEXT: mov v0.h[3], w8
1307 ; CHECK-SD-NEXT: uminv h0, v0.4h
1308 ; CHECK-SD-NEXT: fmov w0, s0
1309 ; CHECK-SD-NEXT: ret
1311 ; CHECK-GI-LABEL: uminv_v3i16:
1312 ; CHECK-GI: // %bb.0: // %entry
1313 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1314 ; CHECK-GI-NEXT: mov w8, #65535 // =0xffff
1315 ; CHECK-GI-NEXT: mov v0.h[3], w8
1316 ; CHECK-GI-NEXT: uminv h0, v0.4h
1317 ; CHECK-GI-NEXT: fmov w0, s0
1318 ; CHECK-GI-NEXT: ret
1320 %arg1 = call i16 @llvm.vector.reduce.umin.v3i16(<3 x i16> %a)
1324 define i16 @uminv_v4i16(<4 x i16> %a) {
1325 ; CHECK-LABEL: uminv_v4i16:
1326 ; CHECK: // %bb.0: // %entry
1327 ; CHECK-NEXT: uminv h0, v0.4h
1328 ; CHECK-NEXT: fmov w0, s0
1331 %arg1 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a)
1335 define i16 @uminv_v8i16(<8 x i16> %a) {
1336 ; CHECK-LABEL: uminv_v8i16:
1337 ; CHECK: // %bb.0: // %entry
1338 ; CHECK-NEXT: uminv h0, v0.8h
1339 ; CHECK-NEXT: fmov w0, s0
1342 %arg1 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a)
1346 define i16 @uminv_v16i16(<16 x i16> %a) {
1347 ; CHECK-LABEL: uminv_v16i16:
1348 ; CHECK: // %bb.0: // %entry
1349 ; CHECK-NEXT: umin v0.8h, v0.8h, v1.8h
1350 ; CHECK-NEXT: uminv h0, v0.8h
1351 ; CHECK-NEXT: fmov w0, s0
1354 %arg1 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %a)
1358 define i32 @uminv_v2i32(<2 x i32> %a) {
1359 ; CHECK-LABEL: uminv_v2i32:
1360 ; CHECK: // %bb.0: // %entry
1361 ; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s
1362 ; CHECK-NEXT: fmov w0, s0
1365 %arg1 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a)
1369 define i32 @uminv_v3i32(<3 x i32> %a) {
1370 ; CHECK-LABEL: uminv_v3i32:
1371 ; CHECK: // %bb.0: // %entry
1372 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff
1373 ; CHECK-NEXT: mov v0.s[3], w8
1374 ; CHECK-NEXT: uminv s0, v0.4s
1375 ; CHECK-NEXT: fmov w0, s0
1378 %arg1 = call i32 @llvm.vector.reduce.umin.v3i32(<3 x i32> %a)
1382 define i32 @uminv_v4i32(<4 x i32> %a) {
1383 ; CHECK-LABEL: uminv_v4i32:
1384 ; CHECK: // %bb.0: // %entry
1385 ; CHECK-NEXT: uminv s0, v0.4s
1386 ; CHECK-NEXT: fmov w0, s0
1389 %arg1 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a)
1393 define i32 @uminv_v8i32(<8 x i32> %a) {
1394 ; CHECK-LABEL: uminv_v8i32:
1395 ; CHECK: // %bb.0: // %entry
1396 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
1397 ; CHECK-NEXT: uminv s0, v0.4s
1398 ; CHECK-NEXT: fmov w0, s0
1401 %arg1 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %a)
1405 define i64 @uminv_v2i64(<2 x i64> %a) {
1406 ; CHECK-SD-LABEL: uminv_v2i64:
1407 ; CHECK-SD: // %bb.0: // %entry
1408 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1409 ; CHECK-SD-NEXT: cmhi d2, d1, d0
1410 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
1411 ; CHECK-SD-NEXT: fmov x0, d0
1412 ; CHECK-SD-NEXT: ret
1414 ; CHECK-GI-LABEL: uminv_v2i64:
1415 ; CHECK-GI: // %bb.0: // %entry
1416 ; CHECK-GI-NEXT: mov d1, v0.d[1]
1417 ; CHECK-GI-NEXT: fmov x8, d0
1418 ; CHECK-GI-NEXT: fmov x9, d1
1419 ; CHECK-GI-NEXT: cmp x8, x9
1420 ; CHECK-GI-NEXT: fcsel d0, d0, d1, lo
1421 ; CHECK-GI-NEXT: fmov x0, d0
1422 ; CHECK-GI-NEXT: ret
1424 %arg1 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a)
1428 define i64 @uminv_v3i64(<3 x i64> %a) {
1429 ; CHECK-SD-LABEL: uminv_v3i64:
1430 ; CHECK-SD: // %bb.0: // %entry
1431 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
1432 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1433 ; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff
1434 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
1435 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
1436 ; CHECK-SD-NEXT: mov v2.d[1], x8
1437 ; CHECK-SD-NEXT: cmhi v1.2d, v2.2d, v0.2d
1438 ; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
1439 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1440 ; CHECK-SD-NEXT: cmhi d2, d1, d0
1441 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
1442 ; CHECK-SD-NEXT: fmov x0, d0
1443 ; CHECK-SD-NEXT: ret
1445 ; CHECK-GI-LABEL: uminv_v3i64:
1446 ; CHECK-GI: // %bb.0: // %entry
1447 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1448 ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
1449 ; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
1450 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
1451 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
1452 ; CHECK-GI-NEXT: mov v2.d[1], x8
1453 ; CHECK-GI-NEXT: cmhi v1.2d, v2.2d, v0.2d
1454 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
1455 ; CHECK-GI-NEXT: mov d1, v0.d[1]
1456 ; CHECK-GI-NEXT: fmov x8, d0
1457 ; CHECK-GI-NEXT: fmov x9, d1
1458 ; CHECK-GI-NEXT: cmp x8, x9
1459 ; CHECK-GI-NEXT: fcsel d0, d0, d1, lo
1460 ; CHECK-GI-NEXT: fmov x0, d0
1461 ; CHECK-GI-NEXT: ret
1463 %arg1 = call i64 @llvm.vector.reduce.umin.v3i64(<3 x i64> %a)
1467 define i64 @uminv_v4i64(<4 x i64> %a) {
1468 ; CHECK-SD-LABEL: uminv_v4i64:
1469 ; CHECK-SD: // %bb.0: // %entry
1470 ; CHECK-SD-NEXT: cmhi v2.2d, v1.2d, v0.2d
1471 ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
1472 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1473 ; CHECK-SD-NEXT: cmhi d2, d1, d0
1474 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
1475 ; CHECK-SD-NEXT: fmov x0, d0
1476 ; CHECK-SD-NEXT: ret
1478 ; CHECK-GI-LABEL: uminv_v4i64:
1479 ; CHECK-GI: // %bb.0: // %entry
1480 ; CHECK-GI-NEXT: cmhi v2.2d, v1.2d, v0.2d
1481 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
1482 ; CHECK-GI-NEXT: mov d1, v0.d[1]
1483 ; CHECK-GI-NEXT: fmov x8, d0
1484 ; CHECK-GI-NEXT: fmov x9, d1
1485 ; CHECK-GI-NEXT: cmp x8, x9
1486 ; CHECK-GI-NEXT: fcsel d0, d0, d1, lo
1487 ; CHECK-GI-NEXT: fmov x0, d0
1488 ; CHECK-GI-NEXT: ret
1490 %arg1 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %a)
1494 define i128 @uminv_v2i128(<2 x i128> %a) {
1495 ; CHECK-SD-LABEL: uminv_v2i128:
1496 ; CHECK-SD: // %bb.0: // %entry
1497 ; CHECK-SD-NEXT: cmp x0, x2
1498 ; CHECK-SD-NEXT: sbcs xzr, x1, x3
1499 ; CHECK-SD-NEXT: csel x0, x0, x2, lo
1500 ; CHECK-SD-NEXT: csel x1, x1, x3, lo
1501 ; CHECK-SD-NEXT: ret
1503 ; CHECK-GI-LABEL: uminv_v2i128:
1504 ; CHECK-GI: // %bb.0: // %entry
1505 ; CHECK-GI-NEXT: cmp x0, x2
1506 ; CHECK-GI-NEXT: cset w8, lo
1507 ; CHECK-GI-NEXT: cmp x1, x3
1508 ; CHECK-GI-NEXT: cset w9, lo
1509 ; CHECK-GI-NEXT: csel w8, w8, w9, eq
1510 ; CHECK-GI-NEXT: tst w8, #0x1
1511 ; CHECK-GI-NEXT: csel x0, x0, x2, ne
1512 ; CHECK-GI-NEXT: csel x1, x1, x3, ne
1513 ; CHECK-GI-NEXT: ret
1515 %arg1 = call i128 @llvm.vector.reduce.umin.v2i128(<2 x i128> %a)
1519 define i8 @umaxv_v2i8(<2 x i8> %a) {
1520 ; CHECK-SD-LABEL: umaxv_v2i8:
1521 ; CHECK-SD: // %bb.0: // %entry
1522 ; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
1523 ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
1524 ; CHECK-SD-NEXT: umaxp v0.2s, v0.2s, v0.2s
1525 ; CHECK-SD-NEXT: fmov w0, s0
1526 ; CHECK-SD-NEXT: ret
1528 ; CHECK-GI-LABEL: umaxv_v2i8:
1529 ; CHECK-GI: // %bb.0: // %entry
1530 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1531 ; CHECK-GI-NEXT: mov s1, v0.s[1]
1532 ; CHECK-GI-NEXT: fmov w8, s0
1533 ; CHECK-GI-NEXT: and w8, w8, #0xff
1534 ; CHECK-GI-NEXT: fmov w9, s1
1535 ; CHECK-GI-NEXT: cmp w8, w9, uxtb
1536 ; CHECK-GI-NEXT: fcsel s0, s0, s1, hi
1537 ; CHECK-GI-NEXT: fmov w0, s0
1538 ; CHECK-GI-NEXT: ret
1540 %arg1 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> %a)
1544 define i8 @umaxv_v3i8(<3 x i8> %a) {
1545 ; CHECK-SD-LABEL: umaxv_v3i8:
1546 ; CHECK-SD: // %bb.0: // %entry
1547 ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
1548 ; CHECK-SD-NEXT: mov v0.h[0], w0
1549 ; CHECK-SD-NEXT: mov v0.h[1], w1
1550 ; CHECK-SD-NEXT: mov v0.h[2], w2
1551 ; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
1552 ; CHECK-SD-NEXT: umaxv h0, v0.4h
1553 ; CHECK-SD-NEXT: fmov w0, s0
1554 ; CHECK-SD-NEXT: ret
1556 ; CHECK-GI-LABEL: umaxv_v3i8:
1557 ; CHECK-GI: // %bb.0: // %entry
1558 ; CHECK-GI-NEXT: and w8, w0, #0xff
1559 ; CHECK-GI-NEXT: cmp w8, w1, uxtb
1560 ; CHECK-GI-NEXT: csel w8, w0, w1, hi
1561 ; CHECK-GI-NEXT: and w9, w8, #0xff
1562 ; CHECK-GI-NEXT: cmp w9, w2, uxtb
1563 ; CHECK-GI-NEXT: csel w0, w8, w2, hi
1564 ; CHECK-GI-NEXT: ret
1566 %arg1 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a)
1570 define i8 @umaxv_v4i8(<4 x i8> %a) {
1571 ; CHECK-SD-LABEL: umaxv_v4i8:
1572 ; CHECK-SD: // %bb.0: // %entry
1573 ; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
1574 ; CHECK-SD-NEXT: umaxv h0, v0.4h
1575 ; CHECK-SD-NEXT: fmov w0, s0
1576 ; CHECK-SD-NEXT: ret
1578 ; CHECK-GI-LABEL: umaxv_v4i8:
1579 ; CHECK-GI: // %bb.0: // %entry
1580 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1581 ; CHECK-GI-NEXT: umov w8, v0.h[0]
1582 ; CHECK-GI-NEXT: umov w9, v0.h[1]
1583 ; CHECK-GI-NEXT: umov w10, v0.h[2]
1584 ; CHECK-GI-NEXT: umov w11, v0.h[3]
1585 ; CHECK-GI-NEXT: and w12, w8, #0xff
1586 ; CHECK-GI-NEXT: cmp w12, w9, uxtb
1587 ; CHECK-GI-NEXT: and w12, w10, #0xff
1588 ; CHECK-GI-NEXT: csel w8, w8, w9, hi
1589 ; CHECK-GI-NEXT: cmp w12, w11, uxtb
1590 ; CHECK-GI-NEXT: csel w9, w10, w11, hi
1591 ; CHECK-GI-NEXT: and w10, w8, #0xff
1592 ; CHECK-GI-NEXT: cmp w10, w9, uxtb
1593 ; CHECK-GI-NEXT: csel w0, w8, w9, hi
1594 ; CHECK-GI-NEXT: ret
1596 %arg1 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> %a)
1600 define i8 @umaxv_v8i8(<8 x i8> %a) {
1601 ; CHECK-LABEL: umaxv_v8i8:
1602 ; CHECK: // %bb.0: // %entry
1603 ; CHECK-NEXT: umaxv b0, v0.8b
1604 ; CHECK-NEXT: fmov w0, s0
1607 %arg1 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a)
1611 define i8 @umaxv_v16i8(<16 x i8> %a) {
1612 ; CHECK-LABEL: umaxv_v16i8:
1613 ; CHECK: // %bb.0: // %entry
1614 ; CHECK-NEXT: umaxv b0, v0.16b
1615 ; CHECK-NEXT: fmov w0, s0
1618 %arg1 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a)
1622 define i8 @umaxv_v32i8(<32 x i8> %a) {
1623 ; CHECK-LABEL: umaxv_v32i8:
1624 ; CHECK: // %bb.0: // %entry
1625 ; CHECK-NEXT: umax v0.16b, v0.16b, v1.16b
1626 ; CHECK-NEXT: umaxv b0, v0.16b
1627 ; CHECK-NEXT: fmov w0, s0
1630 %arg1 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %a)
1634 define i16 @umaxv_v2i16(<2 x i16> %a) {
1635 ; CHECK-SD-LABEL: umaxv_v2i16:
1636 ; CHECK-SD: // %bb.0: // %entry
1637 ; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff
1638 ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
1639 ; CHECK-SD-NEXT: umaxp v0.2s, v0.2s, v0.2s
1640 ; CHECK-SD-NEXT: fmov w0, s0
1641 ; CHECK-SD-NEXT: ret
1643 ; CHECK-GI-LABEL: umaxv_v2i16:
1644 ; CHECK-GI: // %bb.0: // %entry
1645 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1646 ; CHECK-GI-NEXT: mov s1, v0.s[1]
1647 ; CHECK-GI-NEXT: fmov w8, s0
1648 ; CHECK-GI-NEXT: and w8, w8, #0xffff
1649 ; CHECK-GI-NEXT: fmov w9, s1
1650 ; CHECK-GI-NEXT: cmp w8, w9, uxth
1651 ; CHECK-GI-NEXT: fcsel s0, s0, s1, hi
1652 ; CHECK-GI-NEXT: fmov w0, s0
1653 ; CHECK-GI-NEXT: ret
1655 %arg1 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> %a)
1659 define i16 @umaxv_v3i16(<3 x i16> %a) {
1660 ; CHECK-SD-LABEL: umaxv_v3i16:
1661 ; CHECK-SD: // %bb.0: // %entry
1662 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1663 ; CHECK-SD-NEXT: mov v0.h[3], wzr
1664 ; CHECK-SD-NEXT: umaxv h0, v0.4h
1665 ; CHECK-SD-NEXT: fmov w0, s0
1666 ; CHECK-SD-NEXT: ret
1668 ; CHECK-GI-LABEL: umaxv_v3i16:
1669 ; CHECK-GI: // %bb.0: // %entry
1670 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1671 ; CHECK-GI-NEXT: mov w8, #0 // =0x0
1672 ; CHECK-GI-NEXT: mov v0.h[3], w8
1673 ; CHECK-GI-NEXT: umaxv h0, v0.4h
1674 ; CHECK-GI-NEXT: fmov w0, s0
1675 ; CHECK-GI-NEXT: ret
1677 %arg1 = call i16 @llvm.vector.reduce.umax.v3i16(<3 x i16> %a)
1681 define i16 @umaxv_v4i16(<4 x i16> %a) {
1682 ; CHECK-LABEL: umaxv_v4i16:
1683 ; CHECK: // %bb.0: // %entry
1684 ; CHECK-NEXT: umaxv h0, v0.4h
1685 ; CHECK-NEXT: fmov w0, s0
1688 %arg1 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a)
1692 define i16 @umaxv_v8i16(<8 x i16> %a) {
1693 ; CHECK-LABEL: umaxv_v8i16:
1694 ; CHECK: // %bb.0: // %entry
1695 ; CHECK-NEXT: umaxv h0, v0.8h
1696 ; CHECK-NEXT: fmov w0, s0
1699 %arg1 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a)
1703 define i16 @umaxv_v16i16(<16 x i16> %a) {
1704 ; CHECK-LABEL: umaxv_v16i16:
1705 ; CHECK: // %bb.0: // %entry
1706 ; CHECK-NEXT: umax v0.8h, v0.8h, v1.8h
1707 ; CHECK-NEXT: umaxv h0, v0.8h
1708 ; CHECK-NEXT: fmov w0, s0
1711 %arg1 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %a)
1715 define i32 @umaxv_v2i32(<2 x i32> %a) {
1716 ; CHECK-LABEL: umaxv_v2i32:
1717 ; CHECK: // %bb.0: // %entry
1718 ; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s
1719 ; CHECK-NEXT: fmov w0, s0
1722 %arg1 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a)
1726 define i32 @umaxv_v3i32(<3 x i32> %a) {
1727 ; CHECK-LABEL: umaxv_v3i32:
1728 ; CHECK: // %bb.0: // %entry
1729 ; CHECK-NEXT: mov v0.s[3], wzr
1730 ; CHECK-NEXT: umaxv s0, v0.4s
1731 ; CHECK-NEXT: fmov w0, s0
1734 %arg1 = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
1738 define i32 @umaxv_v4i32(<4 x i32> %a) {
1739 ; CHECK-LABEL: umaxv_v4i32:
1740 ; CHECK: // %bb.0: // %entry
1741 ; CHECK-NEXT: umaxv s0, v0.4s
1742 ; CHECK-NEXT: fmov w0, s0
1745 %arg1 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a)
1749 define i32 @umaxv_v8i32(<8 x i32> %a) {
1750 ; CHECK-LABEL: umaxv_v8i32:
1751 ; CHECK: // %bb.0: // %entry
1752 ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
1753 ; CHECK-NEXT: umaxv s0, v0.4s
1754 ; CHECK-NEXT: fmov w0, s0
1757 %arg1 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %a)
1761 define i64 @umaxv_v2i64(<2 x i64> %a) {
1762 ; CHECK-SD-LABEL: umaxv_v2i64:
1763 ; CHECK-SD: // %bb.0: // %entry
1764 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1765 ; CHECK-SD-NEXT: cmhi d2, d0, d1
1766 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
1767 ; CHECK-SD-NEXT: fmov x0, d0
1768 ; CHECK-SD-NEXT: ret
1770 ; CHECK-GI-LABEL: umaxv_v2i64:
1771 ; CHECK-GI: // %bb.0: // %entry
1772 ; CHECK-GI-NEXT: mov d1, v0.d[1]
1773 ; CHECK-GI-NEXT: fmov x8, d0
1774 ; CHECK-GI-NEXT: fmov x9, d1
1775 ; CHECK-GI-NEXT: cmp x8, x9
1776 ; CHECK-GI-NEXT: fcsel d0, d0, d1, hi
1777 ; CHECK-GI-NEXT: fmov x0, d0
1778 ; CHECK-GI-NEXT: ret
1780 %arg1 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
1784 define i64 @umaxv_v3i64(<3 x i64> %a) {
1785 ; CHECK-SD-LABEL: umaxv_v3i64:
1786 ; CHECK-SD: // %bb.0: // %entry
1787 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
1788 ; CHECK-SD-NEXT: mov v3.16b, v2.16b
1789 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1790 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
1791 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
1792 ; CHECK-SD-NEXT: mov v3.d[1], xzr
1793 ; CHECK-SD-NEXT: cmhi v3.2d, v0.2d, v3.2d
1794 ; CHECK-SD-NEXT: ext v4.16b, v3.16b, v3.16b, #8
1795 ; CHECK-SD-NEXT: bif v0.16b, v2.16b, v3.16b
1796 ; CHECK-SD-NEXT: and v1.8b, v1.8b, v4.8b
1797 ; CHECK-SD-NEXT: cmhi d2, d0, d1
1798 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
1799 ; CHECK-SD-NEXT: fmov x0, d0
1800 ; CHECK-SD-NEXT: ret
1802 ; CHECK-GI-LABEL: umaxv_v3i64:
1803 ; CHECK-GI: // %bb.0: // %entry
1804 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1805 ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
1806 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
1807 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
1808 ; CHECK-GI-NEXT: mov v2.d[1], xzr
1809 ; CHECK-GI-NEXT: cmhi v1.2d, v0.2d, v2.2d
1810 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
1811 ; CHECK-GI-NEXT: mov d1, v0.d[1]
1812 ; CHECK-GI-NEXT: fmov x8, d0
1813 ; CHECK-GI-NEXT: fmov x9, d1
1814 ; CHECK-GI-NEXT: cmp x8, x9
1815 ; CHECK-GI-NEXT: fcsel d0, d0, d1, hi
1816 ; CHECK-GI-NEXT: fmov x0, d0
1817 ; CHECK-GI-NEXT: ret
1819 %arg1 = call i64 @llvm.vector.reduce.umax.v3i64(<3 x i64> %a)
1823 define i64 @umaxv_v4i64(<4 x i64> %a) {
1824 ; CHECK-SD-LABEL: umaxv_v4i64:
1825 ; CHECK-SD: // %bb.0: // %entry
1826 ; CHECK-SD-NEXT: cmhi v2.2d, v0.2d, v1.2d
1827 ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
1828 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1829 ; CHECK-SD-NEXT: cmhi d2, d0, d1
1830 ; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
1831 ; CHECK-SD-NEXT: fmov x0, d0
1832 ; CHECK-SD-NEXT: ret
1834 ; CHECK-GI-LABEL: umaxv_v4i64:
1835 ; CHECK-GI: // %bb.0: // %entry
1836 ; CHECK-GI-NEXT: cmhi v2.2d, v0.2d, v1.2d
1837 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
1838 ; CHECK-GI-NEXT: mov d1, v0.d[1]
1839 ; CHECK-GI-NEXT: fmov x8, d0
1840 ; CHECK-GI-NEXT: fmov x9, d1
1841 ; CHECK-GI-NEXT: cmp x8, x9
1842 ; CHECK-GI-NEXT: fcsel d0, d0, d1, hi
1843 ; CHECK-GI-NEXT: fmov x0, d0
1844 ; CHECK-GI-NEXT: ret
1846 %arg1 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %a)
1850 define i128 @umaxv_v2i128(<2 x i128> %a) {
1851 ; CHECK-SD-LABEL: umaxv_v2i128:
1852 ; CHECK-SD: // %bb.0: // %entry
1853 ; CHECK-SD-NEXT: cmp x2, x0
1854 ; CHECK-SD-NEXT: sbcs xzr, x3, x1
1855 ; CHECK-SD-NEXT: csel x0, x0, x2, lo
1856 ; CHECK-SD-NEXT: csel x1, x1, x3, lo
1857 ; CHECK-SD-NEXT: ret
1859 ; CHECK-GI-LABEL: umaxv_v2i128:
1860 ; CHECK-GI: // %bb.0: // %entry
1861 ; CHECK-GI-NEXT: cmp x0, x2
1862 ; CHECK-GI-NEXT: cset w8, hi
1863 ; CHECK-GI-NEXT: cmp x1, x3
1864 ; CHECK-GI-NEXT: cset w9, hi
1865 ; CHECK-GI-NEXT: csel w8, w8, w9, eq
1866 ; CHECK-GI-NEXT: tst w8, #0x1
1867 ; CHECK-GI-NEXT: csel x0, x0, x2, ne
1868 ; CHECK-GI-NEXT: csel x1, x1, x3, ne
1869 ; CHECK-GI-NEXT: ret
1871 %arg1 = call i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a)