1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -force-vector-interleave=1 -o - %s | FileCheck %s
4 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5 target triple = "aarch64-none-unknown-elf"
7 define <4 x i32> @partial_reduce_add_fixed(<4 x i32> %accumulator, <4 x i32> %0) #0 {
8 ; CHECK-LABEL: partial_reduce_add_fixed:
9 ; CHECK: // %bb.0: // %entry
10 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
13 %partial.reduce = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v4i32.v4i32(<4 x i32> %accumulator, <4 x i32> %0)
14 ret <4 x i32> %partial.reduce
17 define <4 x i32> @partial_reduce_add_fixed_half(<4 x i32> %accumulator, <8 x i32> %0) #0 {
18 ; CHECK-LABEL: partial_reduce_add_fixed_half:
19 ; CHECK: // %bb.0: // %entry
20 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
21 ; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
24 %partial.reduce = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> %accumulator, <8 x i32> %0)
25 ret <4 x i32> %partial.reduce
28 define <vscale x 4 x i32> @partial_reduce_add(<vscale x 4 x i32> %accumulator, <vscale x 4 x i32> %0) #0 {
29 ; CHECK-LABEL: partial_reduce_add:
30 ; CHECK: // %bb.0: // %entry
31 ; CHECK-NEXT: add z0.s, z0.s, z1.s
34 %partial.reduce = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv4i32(<vscale x 4 x i32> %accumulator, <vscale x 4 x i32> %0)
35 ret <vscale x 4 x i32> %partial.reduce
38 define <vscale x 4 x i32> @partial_reduce_add_half(<vscale x 4 x i32> %accumulator, <vscale x 8 x i32> %0) #0 {
39 ; CHECK-LABEL: partial_reduce_add_half:
40 ; CHECK: // %bb.0: // %entry
41 ; CHECK-NEXT: add z0.s, z0.s, z1.s
42 ; CHECK-NEXT: add z0.s, z2.s, z0.s
45 %partial.reduce = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv8i32(<vscale x 4 x i32> %accumulator, <vscale x 8 x i32> %0)
46 ret <vscale x 4 x i32> %partial.reduce
49 define <vscale x 4 x i32> @partial_reduce_add_quart(<vscale x 4 x i32> %accumulator, <vscale x 16 x i32> %0) #0 {
50 ; CHECK-LABEL: partial_reduce_add_quart:
51 ; CHECK: // %bb.0: // %entry
52 ; CHECK-NEXT: add z0.s, z0.s, z1.s
53 ; CHECK-NEXT: add z2.s, z2.s, z3.s
54 ; CHECK-NEXT: add z0.s, z4.s, z0.s
55 ; CHECK-NEXT: add z0.s, z2.s, z0.s
58 %partial.reduce = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv16i32(<vscale x 4 x i32> %accumulator, <vscale x 16 x i32> %0)
59 ret <vscale x 4 x i32> %partial.reduce
62 define <vscale x 8 x i32> @partial_reduce_add_half_8(<vscale x 8 x i32> %accumulator, <vscale x 16 x i32> %0) #0 {
63 ; CHECK-LABEL: partial_reduce_add_half_8:
64 ; CHECK: // %bb.0: // %entry
65 ; CHECK-NEXT: add z0.s, z0.s, z2.s
66 ; CHECK-NEXT: add z1.s, z1.s, z3.s
67 ; CHECK-NEXT: add z0.s, z4.s, z0.s
68 ; CHECK-NEXT: add z1.s, z5.s, z1.s
71 %partial.reduce = call <vscale x 8 x i32> @llvm.experimental.vector.partial.reduce.add.nxv8i32.nxv8i32.nxv16i32(<vscale x 8 x i32> %accumulator, <vscale x 16 x i32> %0)
72 ret <vscale x 8 x i32> %partial.reduce
75 declare <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
76 declare <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv8i32(<vscale x 4 x i32>, <vscale x 8 x i32>)
77 declare <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv16i32(<vscale x 4 x i32>, <vscale x 16 x i32>)
78 declare <vscale x 8 x i32> @llvm.experimental.vector.partial.reduce.add.nxv8i32.nxv8i32.nxv16i32(<vscale x 8 x i32>, <vscale x 16 x i32>)
80 declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
81 declare i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32>)
83 attributes #0 = { "target-features"="+sve2" }