1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve | FileCheck %s
4 define i64 @select_or_reduce_v2i1(ptr nocapture noundef readonly %src) {
5 ; CHECK-LABEL: select_or_reduce_v2i1:
6 ; CHECK: // %bb.0: // %entry
7 ; CHECK-NEXT: mov x8, xzr
8 ; CHECK-NEXT: .LBB0_1: // %vector.body
9 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
10 ; CHECK-NEXT: ldr q0, [x0, x8]
11 ; CHECK-NEXT: cmeq v0.2d, v0.2d, #0
12 ; CHECK-NEXT: umaxv s0, v0.4s
13 ; CHECK-NEXT: fmov w9, s0
14 ; CHECK-NEXT: tbnz w9, #0, .LBB0_3
15 ; CHECK-NEXT: // %bb.2: // %vector.body
16 ; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
17 ; CHECK-NEXT: cmp x8, #16
18 ; CHECK-NEXT: add x8, x8, #16
19 ; CHECK-NEXT: b.ne .LBB0_1
20 ; CHECK-NEXT: .LBB0_3: // %middle.split
21 ; CHECK-NEXT: and x0, x9, #0x1
27 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
28 %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index
29 %wide.load = load <2 x ptr>, ptr %arrayidx, align 8
30 %cond = icmp eq <2 x ptr> %wide.load, splat(ptr zeroinitializer)
31 %index.next = add nuw i64 %index, 2
32 %or.reduc = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %cond)
33 %iv.cmp = icmp eq i64 %index.next, 4
34 %exit.cond = or i1 %or.reduc, %iv.cmp
35 br i1 %exit.cond, label %middle.split, label %vector.body
38 %sel = select i1 %or.reduc, i64 1, i64 0
42 define i64 @br_or_reduce_v2i1(ptr nocapture noundef readonly %src, ptr noundef readnone %p) {
43 ; CHECK-LABEL: br_or_reduce_v2i1:
44 ; CHECK: // %bb.0: // %entry
45 ; CHECK-NEXT: mov x8, xzr
46 ; CHECK-NEXT: .LBB1_1: // %vector.body
47 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
48 ; CHECK-NEXT: ldr q0, [x0, x8]
49 ; CHECK-NEXT: cmeq v0.2d, v0.2d, #0
50 ; CHECK-NEXT: umaxv s0, v0.4s
51 ; CHECK-NEXT: fmov w9, s0
52 ; CHECK-NEXT: tbnz w9, #0, .LBB1_3
53 ; CHECK-NEXT: // %bb.2: // %vector.body
54 ; CHECK-NEXT: // in Loop: Header=BB1_1 Depth=1
55 ; CHECK-NEXT: cmp x8, #16
56 ; CHECK-NEXT: add x8, x8, #16
57 ; CHECK-NEXT: b.ne .LBB1_1
58 ; CHECK-NEXT: .LBB1_3: // %middle.split
59 ; CHECK-NEXT: tbz w9, #0, .LBB1_5
60 ; CHECK-NEXT: // %bb.4: // %found
61 ; CHECK-NEXT: mov w8, #56 // =0x38
62 ; CHECK-NEXT: mov w0, #1 // =0x1
63 ; CHECK-NEXT: str x8, [x1]
65 ; CHECK-NEXT: .LBB1_5:
66 ; CHECK-NEXT: mov x0, xzr
72 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
73 %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index
74 %wide.load = load <2 x ptr>, ptr %arrayidx, align 8
75 %cond = icmp eq <2 x ptr> %wide.load, splat(ptr zeroinitializer)
76 %index.next = add nuw i64 %index, 2
77 %or.reduc = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %cond)
78 %iv.cmp = icmp eq i64 %index.next, 4
79 %exit.cond = or i1 %or.reduc, %iv.cmp
80 br i1 %exit.cond, label %middle.split, label %vector.body
83 br i1 %or.reduc, label %found, label %notfound
86 store i64 56, ptr %p, align 8
93 define i64 @select_or_reduce_nxv2i1(ptr nocapture noundef readonly %src) {
94 ; CHECK-LABEL: select_or_reduce_nxv2i1:
95 ; CHECK: // %bb.0: // %entry
97 ; CHECK-NEXT: ptrue p0.d
98 ; CHECK-NEXT: mov x9, xzr
99 ; CHECK-NEXT: neg x10, x8
100 ; CHECK-NEXT: add x10, x10, #4
101 ; CHECK-NEXT: .LBB2_1: // %vector.body
102 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
103 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3]
104 ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0
105 ; CHECK-NEXT: b.ne .LBB2_3
106 ; CHECK-NEXT: // %bb.2: // %vector.body
107 ; CHECK-NEXT: // in Loop: Header=BB2_1 Depth=1
108 ; CHECK-NEXT: cmp x10, x9
109 ; CHECK-NEXT: add x9, x9, x8
110 ; CHECK-NEXT: b.ne .LBB2_1
111 ; CHECK-NEXT: .LBB2_3: // %middle.split
112 ; CHECK-NEXT: ptest p0, p1.b
113 ; CHECK-NEXT: cset w0, ne
116 %vscale = tail call i64 @llvm.vscale.i64()
117 %vf = shl nuw nsw i64 %vscale, 1
118 br label %vector.body
121 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
122 %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index
123 %wide.load = load <vscale x 2 x ptr>, ptr %arrayidx, align 8
124 %cond = icmp eq <vscale x 2 x ptr> %wide.load, splat(ptr zeroinitializer)
125 %index.next = add nuw i64 %index, %vf
126 %or.reduc = tail call i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1> %cond)
127 %iv.cmp = icmp eq i64 %index.next, 4
128 %exit.cond = or i1 %or.reduc, %iv.cmp
129 br i1 %exit.cond, label %middle.split, label %vector.body
132 %sel = select i1 %or.reduc, i64 1, i64 0
136 define i64 @br_or_reduce_nxv2i1(ptr nocapture noundef readonly %src, ptr noundef readnone %p) {
137 ; CHECK-LABEL: br_or_reduce_nxv2i1:
138 ; CHECK: // %bb.0: // %entry
139 ; CHECK-NEXT: cntd x8
140 ; CHECK-NEXT: ptrue p0.d
141 ; CHECK-NEXT: mov x9, xzr
142 ; CHECK-NEXT: neg x10, x8
143 ; CHECK-NEXT: add x10, x10, #4
144 ; CHECK-NEXT: .LBB3_1: // %vector.body
145 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
146 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3]
147 ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0
148 ; CHECK-NEXT: b.ne .LBB3_3
149 ; CHECK-NEXT: // %bb.2: // %vector.body
150 ; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
151 ; CHECK-NEXT: cmp x10, x9
152 ; CHECK-NEXT: add x9, x9, x8
153 ; CHECK-NEXT: b.ne .LBB3_1
154 ; CHECK-NEXT: .LBB3_3: // %middle.split
155 ; CHECK-NEXT: ptest p0, p1.b
156 ; CHECK-NEXT: b.eq .LBB3_5
157 ; CHECK-NEXT: // %bb.4: // %found
158 ; CHECK-NEXT: mov w8, #56 // =0x38
159 ; CHECK-NEXT: mov w0, #1 // =0x1
160 ; CHECK-NEXT: str x8, [x1]
162 ; CHECK-NEXT: .LBB3_5:
163 ; CHECK-NEXT: mov x0, xzr
166 %vscale = tail call i64 @llvm.vscale.i64()
167 %vf = shl nuw nsw i64 %vscale, 1
168 br label %vector.body
171 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
172 %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index
173 %wide.load = load <vscale x 2 x ptr>, ptr %arrayidx, align 8
174 %cond = icmp eq <vscale x 2 x ptr> %wide.load, splat(ptr zeroinitializer)
175 %index.next = add nuw i64 %index, %vf
176 %or.reduc = tail call i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1> %cond)
177 %iv.cmp = icmp eq i64 %index.next, 4
178 %exit.cond = or i1 %or.reduc, %iv.cmp
179 br i1 %exit.cond, label %middle.split, label %vector.body
182 br i1 %or.reduc, label %found, label %notfound
185 store i64 56, ptr %p, align 8
192 declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>)
193 declare i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1>)