1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \
3 ; RUN: -riscv-v-slp-max-vf=0 -S \
4 ; RUN: | FileCheck %s --check-prefixes=CHECK,ZVL128
5 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v,+zvl256b \
6 ; RUN: -riscv-v-slp-max-vf=0 -S \
7 ; RUN: | FileCheck %s --check-prefixes=CHECK,ZVL256
8 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v,+zvl512b \
9 ; RUN: -riscv-v-slp-max-vf=0 -S \
10 ; RUN: | FileCheck %s --check-prefixes=CHECK,ZVL512
12 target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
13 target triple = "riscv64"
15 ; First batch of tests are simple reductions of various widths
17 define i64 @red_ld_2xi64(ptr %ptr) {
18 ; CHECK-LABEL: @red_ld_2xi64(
20 ; CHECK-NEXT: [[LD0:%.*]] = load i64, ptr [[PTR:%.*]], align 8
21 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 1
22 ; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[GEP]], align 8
23 ; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[LD0]], [[LD1]]
24 ; CHECK-NEXT: ret i64 [[ADD_1]]
27 %ld0 = load i64, ptr %ptr
28 %gep = getelementptr inbounds i64, ptr %ptr, i64 1
29 %ld1 = load i64, ptr %gep
30 %add.1 = add nuw nsw i64 %ld0, %ld1
34 define i64 @red_ld_4xi64(ptr %ptr) {
35 ; CHECK-LABEL: @red_ld_4xi64(
37 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[PTR:%.*]], align 8
38 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP0]])
39 ; CHECK-NEXT: ret i64 [[TMP1]]
42 %ld0 = load i64, ptr %ptr
43 %gep = getelementptr inbounds i64, ptr %ptr, i64 1
44 %ld1 = load i64, ptr %gep
45 %add.1 = add nuw nsw i64 %ld0, %ld1
46 %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2
47 %ld2 = load i64, ptr %gep.1
48 %add.2 = add nuw nsw i64 %add.1, %ld2
49 %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3
50 %ld3 = load i64, ptr %gep.2
51 %add.3 = add nuw nsw i64 %add.2, %ld3
55 define i64 @red_ld_8xi64(ptr %ptr) {
56 ; CHECK-LABEL: @red_ld_8xi64(
58 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[PTR:%.*]], align 8
59 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP0]])
60 ; CHECK-NEXT: ret i64 [[TMP1]]
63 %ld0 = load i64, ptr %ptr
64 %gep = getelementptr inbounds i64, ptr %ptr, i64 1
65 %ld1 = load i64, ptr %gep
66 %add.1 = add nuw nsw i64 %ld0, %ld1
67 %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2
68 %ld2 = load i64, ptr %gep.1
69 %add.2 = add nuw nsw i64 %add.1, %ld2
70 %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3
71 %ld3 = load i64, ptr %gep.2
72 %add.3 = add nuw nsw i64 %add.2, %ld3
73 %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 4
74 %ld4 = load i64, ptr %gep.3
75 %add.4 = add nuw nsw i64 %add.3, %ld4
76 %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 5
77 %ld5 = load i64, ptr %gep.4
78 %add.5 = add nuw nsw i64 %add.4, %ld5
79 %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 6
80 %ld6 = load i64, ptr %gep.5
81 %add.6 = add nuw nsw i64 %add.5, %ld6
82 %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 7
83 %ld7 = load i64, ptr %gep.6
84 %add.7 = add nuw nsw i64 %add.6, %ld7
88 define i64 @red_ld_16xi64(ptr %ptr) {
89 ; CHECK-LABEL: @red_ld_16xi64(
91 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i64>, ptr [[PTR:%.*]], align 8
92 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP0]])
93 ; CHECK-NEXT: ret i64 [[TMP1]]
96 %ld0 = load i64, ptr %ptr
97 %gep = getelementptr inbounds i64, ptr %ptr, i64 1
98 %ld1 = load i64, ptr %gep
99 %add.1 = add nuw nsw i64 %ld0, %ld1
100 %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2
101 %ld2 = load i64, ptr %gep.1
102 %add.2 = add nuw nsw i64 %add.1, %ld2
103 %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3
104 %ld3 = load i64, ptr %gep.2
105 %add.3 = add nuw nsw i64 %add.2, %ld3
106 %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 4
107 %ld4 = load i64, ptr %gep.3
108 %add.4 = add nuw nsw i64 %add.3, %ld4
109 %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 5
110 %ld5 = load i64, ptr %gep.4
111 %add.5 = add nuw nsw i64 %add.4, %ld5
112 %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 6
113 %ld6 = load i64, ptr %gep.5
114 %add.6 = add nuw nsw i64 %add.5, %ld6
115 %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 7
116 %ld7 = load i64, ptr %gep.6
117 %add.7 = add nuw nsw i64 %add.6, %ld7
118 %gep.7 = getelementptr inbounds i64, ptr %ptr, i64 8
119 %ld8 = load i64, ptr %gep.7
120 %add.8 = add nuw nsw i64 %add.7, %ld8
121 %gep.8 = getelementptr inbounds i64, ptr %ptr, i64 9
122 %ld9 = load i64, ptr %gep.8
123 %add.9 = add nuw nsw i64 %add.8, %ld9
124 %gep.9 = getelementptr inbounds i64, ptr %ptr, i64 10
125 %ld10 = load i64, ptr %gep.9
126 %add.10 = add nuw nsw i64 %add.9, %ld10
127 %gep.10 = getelementptr inbounds i64, ptr %ptr, i64 11
128 %ld11 = load i64, ptr %gep.10
129 %add.11 = add nuw nsw i64 %add.10, %ld11
130 %gep.11 = getelementptr inbounds i64, ptr %ptr, i64 12
131 %ld12 = load i64, ptr %gep.11
132 %add.12 = add nuw nsw i64 %add.11, %ld12
133 %gep.12 = getelementptr inbounds i64, ptr %ptr, i64 13
134 %ld13 = load i64, ptr %gep.12
135 %add.13 = add nuw nsw i64 %add.12, %ld13
136 %gep.13 = getelementptr inbounds i64, ptr %ptr, i64 14
137 %ld14 = load i64, ptr %gep.13
138 %add.14 = add nuw nsw i64 %add.13, %ld14
139 %gep.14 = getelementptr inbounds i64, ptr %ptr, i64 15
140 %ld15 = load i64, ptr %gep.14
141 %add.15 = add nuw nsw i64 %add.14, %ld15
145 ; Next batch test differen reductions kinds
147 %struct.buf = type { [8 x i8] }
149 define i8 @reduce_and(ptr %a, ptr %b) {
150 ; CHECK-LABEL: @reduce_and(
152 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
153 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
154 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
155 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
156 ; CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i8> [[TMP1]], [[TMP0]]
157 ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP2]])
158 ; CHECK-NEXT: [[OP_RDX:%.*]] = and i8 [[TMP3]], 1
159 ; CHECK-NEXT: ret i8 [[OP_RDX]]
162 %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
163 %0 = load i8, ptr %arrayidx, align 1
164 %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
165 %1 = load i8, ptr %arrayidx3, align 1
166 %xor12 = xor i8 %1, %0
167 %and13 = and i8 %xor12, 1
168 %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
169 %2 = load i8, ptr %arrayidx.1, align 1
170 %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
171 %3 = load i8, ptr %arrayidx3.1, align 1
172 %xor12.1 = xor i8 %3, %2
173 %and13.1 = and i8 %xor12.1, %and13
174 %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
175 %4 = load i8, ptr %arrayidx.2, align 1
176 %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
177 %5 = load i8, ptr %arrayidx3.2, align 1
178 %xor12.2 = xor i8 %5, %4
179 %and13.2 = and i8 %xor12.2, %and13.1
180 %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
181 %6 = load i8, ptr %arrayidx.3, align 1
182 %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
183 %7 = load i8, ptr %arrayidx3.3, align 1
184 %xor12.3 = xor i8 %7, %6
185 %and13.3 = and i8 %xor12.3, %and13.2
186 %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
187 %8 = load i8, ptr %arrayidx.4, align 1
188 %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
189 %9 = load i8, ptr %arrayidx3.4, align 1
190 %xor12.4 = xor i8 %9, %8
191 %and13.4 = and i8 %xor12.4, %and13.3
192 %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
193 %10 = load i8, ptr %arrayidx.5, align 1
194 %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
195 %11 = load i8, ptr %arrayidx3.5, align 1
196 %xor12.5 = xor i8 %11, %10
197 %and13.5 = and i8 %xor12.5, %and13.4
198 %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
199 %12 = load i8, ptr %arrayidx.6, align 1
200 %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
201 %13 = load i8, ptr %arrayidx3.6, align 1
202 %xor12.6 = xor i8 %13, %12
203 %and13.6 = and i8 %xor12.6, %and13.5
204 %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
205 %14 = load i8, ptr %arrayidx.7, align 1
206 %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
207 %15 = load i8, ptr %arrayidx3.7, align 1
208 %xor12.7 = xor i8 %15, %14
209 %and13.7 = and i8 %xor12.7, %and13.6
213 define i8 @reduce_or_1(ptr %a, ptr %b) {
214 ; CHECK-LABEL: @reduce_or_1(
216 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
217 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
218 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
219 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
220 ; CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i8> [[TMP1]], [[TMP0]]
221 ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP2]])
222 ; CHECK-NEXT: ret i8 [[TMP3]]
226 %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
227 %0 = load i8, ptr %arrayidx, align 1
228 %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
229 %1 = load i8, ptr %arrayidx3, align 1
230 %xor12 = xor i8 %1, %0
231 %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
232 %2 = load i8, ptr %arrayidx.1, align 1
233 %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
234 %3 = load i8, ptr %arrayidx3.1, align 1
235 %xor12.1 = xor i8 %3, %2
236 %or13.1 = or i8 %xor12.1, %xor12
237 %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
238 %4 = load i8, ptr %arrayidx.2, align 1
239 %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
240 %5 = load i8, ptr %arrayidx3.2, align 1
241 %xor12.2 = xor i8 %5, %4
242 %or13.2 = or i8 %xor12.2, %or13.1
243 %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
244 %6 = load i8, ptr %arrayidx.3, align 1
245 %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
246 %7 = load i8, ptr %arrayidx3.3, align 1
247 %xor12.3 = xor i8 %7, %6
248 %or13.3 = or i8 %xor12.3, %or13.2
249 %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
250 %8 = load i8, ptr %arrayidx.4, align 1
251 %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
252 %9 = load i8, ptr %arrayidx3.4, align 1
253 %xor12.4 = xor i8 %9, %8
254 %or13.4 = or i8 %xor12.4, %or13.3
255 %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
256 %10 = load i8, ptr %arrayidx.5, align 1
257 %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
258 %11 = load i8, ptr %arrayidx3.5, align 1
259 %xor12.5 = xor i8 %11, %10
260 %or13.5 = or i8 %xor12.5, %or13.4
261 %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
262 %12 = load i8, ptr %arrayidx.6, align 1
263 %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
264 %13 = load i8, ptr %arrayidx3.6, align 1
265 %xor12.6 = xor i8 %13, %12
266 %or13.6 = or i8 %xor12.6, %or13.5
267 %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
268 %14 = load i8, ptr %arrayidx.7, align 1
269 %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
270 %15 = load i8, ptr %arrayidx3.7, align 1
271 %xor12.7 = xor i8 %15, %14
272 %or13.7 = or i8 %xor12.7, %or13.6
276 define void @reduce_or_2() {
277 ; ZVL128-LABEL: @reduce_or_2(
278 ; ZVL128-NEXT: [[TMP1:%.*]] = shl i64 0, 0
279 ; ZVL128-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15
280 ; ZVL128-NEXT: [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer
281 ; ZVL128-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6
282 ; ZVL128-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer
283 ; ZVL128-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
284 ; ZVL128-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
285 ; ZVL128-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]]
286 ; ZVL128-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]]
288 ; ZVL128-NEXT: ret void
290 ; ZVL128-NEXT: ret void
292 ; ZVL256-LABEL: @reduce_or_2(
293 ; ZVL256-NEXT: [[TMP1:%.*]] = shl i64 0, 0
294 ; ZVL256-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15
295 ; ZVL256-NEXT: [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer
296 ; ZVL256-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6
297 ; ZVL256-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer
298 ; ZVL256-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
299 ; ZVL256-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
300 ; ZVL256-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]]
301 ; ZVL256-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]]
303 ; ZVL256-NEXT: ret void
305 ; ZVL256-NEXT: ret void
307 ; ZVL512-LABEL: @reduce_or_2(
308 ; ZVL512-NEXT: [[TMP1:%.*]] = shl i64 0, 0
309 ; ZVL512-NEXT: [[TMP2:%.*]] = insertelement <32 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 15
310 ; ZVL512-NEXT: [[TMP3:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 15, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
311 ; ZVL512-NEXT: [[TMP4:%.*]] = icmp ult <32 x i64> [[TMP3]], zeroinitializer
312 ; ZVL512-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP4]])
313 ; ZVL512-NEXT: br i1 [[TMP5]], label [[TMP7:%.*]], label [[TMP6:%.*]]
315 ; ZVL512-NEXT: ret void
317 ; ZVL512-NEXT: ret void
320 %2 = icmp ult i64 0, 0
321 %3 = icmp ult i64 0, 0
323 %5 = icmp ult i64 0, 0
325 %7 = icmp ult i64 0, 0
327 %9 = icmp ult i64 0, 0
329 %11 = icmp ult i64 0, 0
331 %13 = icmp ult i64 0, 0
333 %15 = icmp ult i64 0, 0
335 %17 = icmp ult i64 0, 0
337 %19 = icmp ult i64 0, 0
339 %21 = icmp ult i64 0, 0
341 %23 = icmp ult i64 0, 0
343 %25 = icmp ult i64 0, 0
345 %27 = icmp ult i64 0, 0
347 %29 = icmp ult i64 0, 0
349 %31 = icmp ult i64 %1, 0
351 %33 = icmp ult i64 0, 0
353 %35 = icmp ult i64 0, 0
355 %37 = icmp ult i64 0, 0
357 %39 = icmp ult i64 0, 0
359 %41 = icmp ult i64 0, 0
361 %43 = icmp ult i64 0, 0
363 %45 = icmp ult i64 %1, 0
365 %47 = icmp ult i64 0, 0
367 %49 = icmp ult i64 0, 0
369 %51 = icmp ult i64 0, 0
371 %53 = icmp ult i64 0, 0
373 %55 = icmp ult i64 0, 0
375 %57 = icmp ult i64 0, 0
377 %59 = icmp ult i64 0, 0
379 %61 = icmp ult i64 0, 0
381 %63 = icmp ult i64 0, 0
383 br i1 %64, label %66, label %65
392 define i8 @reduce_xor(ptr %a, ptr %b) {
393 ; CHECK-LABEL: @reduce_xor(
395 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
396 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
397 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
398 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
399 ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
400 ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> [[TMP2]])
401 ; CHECK-NEXT: [[OP_RDX:%.*]] = xor i8 [[TMP3]], 1
402 ; CHECK-NEXT: ret i8 [[OP_RDX]]
405 %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
406 %0 = load i8, ptr %arrayidx, align 1
407 %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
408 %1 = load i8, ptr %arrayidx3, align 1
409 %and12 = and i8 %1, %0
410 %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
411 %2 = load i8, ptr %arrayidx.1, align 1
412 %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
413 %3 = load i8, ptr %arrayidx3.1, align 1
414 %and12.1 = and i8 %3, %2
415 %4 = xor i8 %and12, %and12.1
416 %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
417 %5 = load i8, ptr %arrayidx.2, align 1
418 %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
419 %6 = load i8, ptr %arrayidx3.2, align 1
420 %and12.2 = and i8 %6, %5
421 %7 = xor i8 %4, %and12.2
422 %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
423 %8 = load i8, ptr %arrayidx.3, align 1
424 %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
425 %9 = load i8, ptr %arrayidx3.3, align 1
426 %and12.3 = and i8 %9, %8
427 %10 = xor i8 %7, %and12.3
428 %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
429 %11 = load i8, ptr %arrayidx.4, align 1
430 %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
431 %12 = load i8, ptr %arrayidx3.4, align 1
432 %and12.4 = and i8 %12, %11
433 %13 = xor i8 %10, %and12.4
434 %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
435 %14 = load i8, ptr %arrayidx.5, align 1
436 %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
437 %15 = load i8, ptr %arrayidx3.5, align 1
438 %and12.5 = and i8 %15, %14
439 %16 = xor i8 %13, %and12.5
440 %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
441 %17 = load i8, ptr %arrayidx.6, align 1
442 %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
443 %18 = load i8, ptr %arrayidx3.6, align 1
444 %and12.6 = and i8 %18, %17
445 %19 = xor i8 %16, %and12.6
446 %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
447 %20 = load i8, ptr %arrayidx.7, align 1
448 %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
449 %21 = load i8, ptr %arrayidx3.7, align 1
450 %and12.7 = and i8 %21, %20
451 %22 = xor i8 %19, %and12.7
452 %xor13.7 = xor i8 %22, 1
458 define i8 @reduce_add(ptr %a, ptr %b) {
459 ; CHECK-LABEL: @reduce_add(
461 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
462 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
463 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
464 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
465 ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
466 ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[TMP2]])
467 ; CHECK-NEXT: [[OP_RDX:%.*]] = add i8 [[TMP3]], 1
468 ; CHECK-NEXT: ret i8 [[OP_RDX]]
471 %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
472 %0 = load i8, ptr %arrayidx, align 1
473 %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
474 %1 = load i8, ptr %arrayidx3, align 1
475 %and12 = and i8 %1, %0
476 %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
477 %2 = load i8, ptr %arrayidx.1, align 1
478 %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
479 %3 = load i8, ptr %arrayidx3.1, align 1
480 %and12.1 = and i8 %3, %2
481 %4 = add i8 %and12, %and12.1
482 %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
483 %5 = load i8, ptr %arrayidx.2, align 1
484 %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
485 %6 = load i8, ptr %arrayidx3.2, align 1
486 %and12.2 = and i8 %6, %5
487 %7 = add i8 %4, %and12.2
488 %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
489 %8 = load i8, ptr %arrayidx.3, align 1
490 %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
491 %9 = load i8, ptr %arrayidx3.3, align 1
492 %and12.3 = and i8 %9, %8
493 %10 = add i8 %7, %and12.3
494 %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
495 %11 = load i8, ptr %arrayidx.4, align 1
496 %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
497 %12 = load i8, ptr %arrayidx3.4, align 1
498 %and12.4 = and i8 %12, %11
499 %13 = add i8 %10, %and12.4
500 %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
501 %14 = load i8, ptr %arrayidx.5, align 1
502 %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
503 %15 = load i8, ptr %arrayidx3.5, align 1
504 %and12.5 = and i8 %15, %14
505 %16 = add i8 %13, %and12.5
506 %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
507 %17 = load i8, ptr %arrayidx.6, align 1
508 %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
509 %18 = load i8, ptr %arrayidx3.6, align 1
510 %and12.6 = and i8 %18, %17
511 %19 = add i8 %16, %and12.6
512 %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
513 %20 = load i8, ptr %arrayidx.7, align 1
514 %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
515 %21 = load i8, ptr %arrayidx3.7, align 1
516 %and12.7 = and i8 %21, %20
517 %22 = add i8 %19, %and12.7
518 %add13.7 = add i8 %22, 1
522 declare i8 @llvm.smin.i8(i8, i8)
524 define i8 @reduce_smin(ptr %a, ptr %b) {
525 ; CHECK-LABEL: @reduce_smin(
527 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
528 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
529 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
530 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
531 ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
532 ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[TMP2]])
533 ; CHECK-NEXT: ret i8 [[TMP3]]
536 %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
537 %0 = load i8, ptr %arrayidx, align 1
538 %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
539 %1 = load i8, ptr %arrayidx3, align 1
540 %and12 = and i8 %1, %0
541 %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
542 %2 = load i8, ptr %arrayidx.1, align 1
543 %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
544 %3 = load i8, ptr %arrayidx3.1, align 1
545 %and12.1 = and i8 %3, %2
546 %4 = tail call i8 @llvm.smin.i8(i8 %and12, i8 %and12.1)
547 %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
548 %5 = load i8, ptr %arrayidx.2, align 1
549 %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
550 %6 = load i8, ptr %arrayidx3.2, align 1
551 %and12.2 = and i8 %6, %5
552 %7 = tail call i8 @llvm.smin.i8(i8 %4, i8 %and12.2)
553 %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
554 %8 = load i8, ptr %arrayidx.3, align 1
555 %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
556 %9 = load i8, ptr %arrayidx3.3, align 1
557 %and12.3 = and i8 %9, %8
558 %10 = tail call i8 @llvm.smin.i8(i8 %7, i8 %and12.3)
559 %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
560 %11 = load i8, ptr %arrayidx.4, align 1
561 %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
562 %12 = load i8, ptr %arrayidx3.4, align 1
563 %and12.4 = and i8 %12, %11
564 %13 = tail call i8 @llvm.smin.i8(i8 %10, i8 %and12.4)
565 %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
566 %14 = load i8, ptr %arrayidx.5, align 1
567 %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
568 %15 = load i8, ptr %arrayidx3.5, align 1
569 %and12.5 = and i8 %15, %14
570 %16 = tail call i8 @llvm.smin.i8(i8 %13, i8 %and12.5)
571 %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
572 %17 = load i8, ptr %arrayidx.6, align 1
573 %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
574 %18 = load i8, ptr %arrayidx3.6, align 1
575 %and12.6 = and i8 %18, %17
576 %19 = tail call i8 @llvm.smin.i8(i8 %16, i8 %and12.6)
577 %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
578 %20 = load i8, ptr %arrayidx.7, align 1
579 %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
580 %21 = load i8, ptr %arrayidx3.7, align 1
581 %and12.7 = and i8 %21, %20
582 %22 = tail call i8 @llvm.smin.i8(i8 %19, i8 %and12.7)
586 declare i8 @llvm.smax.i8(i8, i8)
588 define i8 @reduce_smax(ptr %a, ptr %b) {
589 ; CHECK-LABEL: @reduce_smax(
591 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
592 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
593 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
594 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
595 ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
596 ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[TMP2]])
597 ; CHECK-NEXT: ret i8 [[TMP3]]
600 %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
601 %0 = load i8, ptr %arrayidx, align 1
602 %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
603 %1 = load i8, ptr %arrayidx3, align 1
604 %and12 = and i8 %1, %0
605 %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
606 %2 = load i8, ptr %arrayidx.1, align 1
607 %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
608 %3 = load i8, ptr %arrayidx3.1, align 1
609 %and12.1 = and i8 %3, %2
610 %4 = tail call i8 @llvm.smax.i8(i8 %and12, i8 %and12.1)
611 %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
612 %5 = load i8, ptr %arrayidx.2, align 1
613 %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
614 %6 = load i8, ptr %arrayidx3.2, align 1
615 %and12.2 = and i8 %6, %5
616 %7 = tail call i8 @llvm.smax.i8(i8 %4, i8 %and12.2)
617 %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
618 %8 = load i8, ptr %arrayidx.3, align 1
619 %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
620 %9 = load i8, ptr %arrayidx3.3, align 1
621 %and12.3 = and i8 %9, %8
622 %10 = tail call i8 @llvm.smax.i8(i8 %7, i8 %and12.3)
623 %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
624 %11 = load i8, ptr %arrayidx.4, align 1
625 %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
626 %12 = load i8, ptr %arrayidx3.4, align 1
627 %and12.4 = and i8 %12, %11
628 %13 = tail call i8 @llvm.smax.i8(i8 %10, i8 %and12.4)
629 %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
630 %14 = load i8, ptr %arrayidx.5, align 1
631 %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
632 %15 = load i8, ptr %arrayidx3.5, align 1
633 %and12.5 = and i8 %15, %14
634 %16 = tail call i8 @llvm.smax.i8(i8 %13, i8 %and12.5)
635 %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
636 %17 = load i8, ptr %arrayidx.6, align 1
637 %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
638 %18 = load i8, ptr %arrayidx3.6, align 1
639 %and12.6 = and i8 %18, %17
640 %19 = tail call i8 @llvm.smax.i8(i8 %16, i8 %and12.6)
641 %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
642 %20 = load i8, ptr %arrayidx.7, align 1
643 %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
644 %21 = load i8, ptr %arrayidx3.7, align 1
645 %and12.7 = and i8 %21, %20
646 %22 = tail call i8 @llvm.smax.i8(i8 %19, i8 %and12.7)
650 declare i8 @llvm.umax.i8(i8, i8)
652 define i8 @reduce_umax(ptr %a, ptr %b) {
653 ; CHECK-LABEL: @reduce_umax(
655 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
656 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
657 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
658 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
659 ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
660 ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> [[TMP2]])
661 ; CHECK-NEXT: ret i8 [[TMP3]]
664 %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
665 %0 = load i8, ptr %arrayidx, align 1
666 %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
667 %1 = load i8, ptr %arrayidx3, align 1
668 %and12 = and i8 %1, %0
669 %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
670 %2 = load i8, ptr %arrayidx.1, align 1
671 %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
672 %3 = load i8, ptr %arrayidx3.1, align 1
673 %and12.1 = and i8 %3, %2
674 %4 = tail call i8 @llvm.umax.i8(i8 %and12, i8 %and12.1)
675 %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
676 %5 = load i8, ptr %arrayidx.2, align 1
677 %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
678 %6 = load i8, ptr %arrayidx3.2, align 1
679 %and12.2 = and i8 %6, %5
680 %7 = tail call i8 @llvm.umax.i8(i8 %4, i8 %and12.2)
681 %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
682 %8 = load i8, ptr %arrayidx.3, align 1
683 %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
684 %9 = load i8, ptr %arrayidx3.3, align 1
685 %and12.3 = and i8 %9, %8
686 %10 = tail call i8 @llvm.umax.i8(i8 %7, i8 %and12.3)
687 %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
688 %11 = load i8, ptr %arrayidx.4, align 1
689 %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
690 %12 = load i8, ptr %arrayidx3.4, align 1
691 %and12.4 = and i8 %12, %11
692 %13 = tail call i8 @llvm.umax.i8(i8 %10, i8 %and12.4)
693 %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
694 %14 = load i8, ptr %arrayidx.5, align 1
695 %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
696 %15 = load i8, ptr %arrayidx3.5, align 1
697 %and12.5 = and i8 %15, %14
698 %16 = tail call i8 @llvm.umax.i8(i8 %13, i8 %and12.5)
699 %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
700 %17 = load i8, ptr %arrayidx.6, align 1
701 %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
702 %18 = load i8, ptr %arrayidx3.6, align 1
703 %and12.6 = and i8 %18, %17
704 %19 = tail call i8 @llvm.umax.i8(i8 %16, i8 %and12.6)
705 %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
706 %20 = load i8, ptr %arrayidx.7, align 1
707 %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
708 %21 = load i8, ptr %arrayidx3.7, align 1
709 %and12.7 = and i8 %21, %20
710 %22 = tail call i8 @llvm.umax.i8(i8 %19, i8 %and12.7)
714 declare i8 @llvm.umin.i8(i8, i8)
716 define i8 @reduce_umin(ptr %a, ptr %b) {
717 ; CHECK-LABEL: @reduce_umin(
719 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
720 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
721 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
722 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
723 ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
724 ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[TMP2]])
725 ; CHECK-NEXT: ret i8 [[TMP3]]
728 %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
729 %0 = load i8, ptr %arrayidx, align 1
730 %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
731 %1 = load i8, ptr %arrayidx3, align 1
732 %and12 = and i8 %1, %0
733 %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
734 %2 = load i8, ptr %arrayidx.1, align 1
735 %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
736 %3 = load i8, ptr %arrayidx3.1, align 1
737 %and12.1 = and i8 %3, %2
738 %4 = tail call i8 @llvm.umin.i8(i8 %and12, i8 %and12.1)
739 %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
740 %5 = load i8, ptr %arrayidx.2, align 1
741 %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
742 %6 = load i8, ptr %arrayidx3.2, align 1
743 %and12.2 = and i8 %6, %5
744 %7 = tail call i8 @llvm.umin.i8(i8 %4, i8 %and12.2)
745 %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
746 %8 = load i8, ptr %arrayidx.3, align 1
747 %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
748 %9 = load i8, ptr %arrayidx3.3, align 1
749 %and12.3 = and i8 %9, %8
750 %10 = tail call i8 @llvm.umin.i8(i8 %7, i8 %and12.3)
751 %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
752 %11 = load i8, ptr %arrayidx.4, align 1
753 %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
754 %12 = load i8, ptr %arrayidx3.4, align 1
755 %and12.4 = and i8 %12, %11
756 %13 = tail call i8 @llvm.umin.i8(i8 %10, i8 %and12.4)
757 %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
758 %14 = load i8, ptr %arrayidx.5, align 1
759 %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
760 %15 = load i8, ptr %arrayidx3.5, align 1
761 %and12.5 = and i8 %15, %14
762 %16 = tail call i8 @llvm.umin.i8(i8 %13, i8 %and12.5)
763 %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
764 %17 = load i8, ptr %arrayidx.6, align 1
765 %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
766 %18 = load i8, ptr %arrayidx3.6, align 1
767 %and12.6 = and i8 %18, %17
768 %19 = tail call i8 @llvm.umin.i8(i8 %16, i8 %and12.6)
769 %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
770 %20 = load i8, ptr %arrayidx.7, align 1
771 %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
772 %21 = load i8, ptr %arrayidx3.7, align 1
773 %and12.7 = and i8 %21, %20
774 %22 = tail call i8 @llvm.umin.i8(i8 %19, i8 %and12.7)
778 ; Next batch exercise reductions involing zext of narrower loads
780 define i64 @red_zext_ld_2xi64(ptr %ptr) {
781 ; CHECK-LABEL: @red_zext_ld_2xi64(
783 ; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
784 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64
785 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
786 ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1
787 ; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64
788 ; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]]
789 ; CHECK-NEXT: ret i64 [[ADD_1]]
792 %ld0 = load i8, ptr %ptr
793 %zext = zext i8 %ld0 to i64
794 %gep = getelementptr inbounds i8, ptr %ptr, i64 1
795 %ld1 = load i8, ptr %gep
796 %zext.1 = zext i8 %ld1 to i64
797 %add.1 = add nuw nsw i64 %zext, %zext.1
801 define i64 @red_zext_ld_4xi64(ptr %ptr) {
802 ; CHECK-LABEL: @red_zext_ld_4xi64(
804 ; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
805 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64
806 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
807 ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1
808 ; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64
809 ; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]]
810 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
811 ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_1]], align 1
812 ; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[LD2]] to i64
813 ; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[ZEXT_2]]
814 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
815 ; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1
816 ; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64
817 ; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]]
818 ; CHECK-NEXT: ret i64 [[ADD_3]]
821 %ld0 = load i8, ptr %ptr
822 %zext = zext i8 %ld0 to i64
823 %gep = getelementptr inbounds i8, ptr %ptr, i64 1
824 %ld1 = load i8, ptr %gep
825 %zext.1 = zext i8 %ld1 to i64
826 %add.1 = add nuw nsw i64 %zext, %zext.1
827 %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2
828 %ld2 = load i8, ptr %gep.1
829 %zext.2 = zext i8 %ld2 to i64
830 %add.2 = add nuw nsw i64 %add.1, %zext.2
831 %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3
832 %ld3 = load i8, ptr %gep.2
833 %zext.3 = zext i8 %ld3 to i64
834 %add.3 = add nuw nsw i64 %add.2, %zext.3
838 define i64 @red_zext_ld_8xi64(ptr %ptr) {
839 ; CHECK-LABEL: @red_zext_ld_8xi64(
841 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1
842 ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i64>
843 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP1]])
844 ; CHECK-NEXT: ret i64 [[TMP2]]
847 %ld0 = load i8, ptr %ptr
848 %zext = zext i8 %ld0 to i64
849 %gep = getelementptr inbounds i8, ptr %ptr, i64 1
850 %ld1 = load i8, ptr %gep
851 %zext.1 = zext i8 %ld1 to i64
852 %add.1 = add nuw nsw i64 %zext, %zext.1
853 %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2
854 %ld2 = load i8, ptr %gep.1
855 %zext.2 = zext i8 %ld2 to i64
856 %add.2 = add nuw nsw i64 %add.1, %zext.2
857 %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3
858 %ld3 = load i8, ptr %gep.2
859 %zext.3 = zext i8 %ld3 to i64
860 %add.3 = add nuw nsw i64 %add.2, %zext.3
861 %gep.3 = getelementptr inbounds i8, ptr %ptr, i64 4
862 %ld4 = load i8, ptr %gep.3
863 %zext.4 = zext i8 %ld4 to i64
864 %add.4 = add nuw nsw i64 %add.3, %zext.4
865 %gep.4 = getelementptr inbounds i8, ptr %ptr, i64 5
866 %ld5 = load i8, ptr %gep.4
867 %zext.5 = zext i8 %ld5 to i64
868 %add.5 = add nuw nsw i64 %add.4, %zext.5
869 %gep.5 = getelementptr inbounds i8, ptr %ptr, i64 6
870 %ld6 = load i8, ptr %gep.5
871 %zext.6 = zext i8 %ld6 to i64
872 %add.6 = add nuw nsw i64 %add.5, %zext.6
873 %gep.6 = getelementptr inbounds i8, ptr %ptr, i64 7
874 %ld7 = load i8, ptr %gep.6
875 %zext.7 = zext i8 %ld7 to i64
876 %add.7 = add nuw nsw i64 %add.6, %zext.7
880 define i64 @red_zext_ld_16xi64(ptr %ptr) {
881 ; CHECK-LABEL: @red_zext_ld_16xi64(
883 ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
884 ; CHECK-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[TMP0]] to <16 x i64>
885 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP1]])
886 ; CHECK-NEXT: ret i64 [[TMP2]]
889 %ld0 = load i8, ptr %ptr
890 %zext = zext i8 %ld0 to i64
891 %gep = getelementptr inbounds i8, ptr %ptr, i64 1
892 %ld1 = load i8, ptr %gep
893 %zext.1 = zext i8 %ld1 to i64
894 %add.1 = add nuw nsw i64 %zext, %zext.1
895 %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2
896 %ld2 = load i8, ptr %gep.1
897 %zext.2 = zext i8 %ld2 to i64
898 %add.2 = add nuw nsw i64 %add.1, %zext.2
899 %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3
900 %ld3 = load i8, ptr %gep.2
901 %zext.3 = zext i8 %ld3 to i64
902 %add.3 = add nuw nsw i64 %add.2, %zext.3
903 %gep.3 = getelementptr inbounds i8, ptr %ptr, i64 4
904 %ld4 = load i8, ptr %gep.3
905 %zext.4 = zext i8 %ld4 to i64
906 %add.4 = add nuw nsw i64 %add.3, %zext.4
907 %gep.4 = getelementptr inbounds i8, ptr %ptr, i64 5
908 %ld5 = load i8, ptr %gep.4
909 %zext.5 = zext i8 %ld5 to i64
910 %add.5 = add nuw nsw i64 %add.4, %zext.5
911 %gep.5 = getelementptr inbounds i8, ptr %ptr, i64 6
912 %ld6 = load i8, ptr %gep.5
913 %zext.6 = zext i8 %ld6 to i64
914 %add.6 = add nuw nsw i64 %add.5, %zext.6
915 %gep.6 = getelementptr inbounds i8, ptr %ptr, i64 7
916 %ld7 = load i8, ptr %gep.6
917 %zext.7 = zext i8 %ld7 to i64
918 %add.7 = add nuw nsw i64 %add.6, %zext.7
919 %gep.7 = getelementptr inbounds i8, ptr %ptr, i64 8
920 %ld8 = load i8, ptr %gep.7
921 %zext.8 = zext i8 %ld8 to i64
922 %add.8 = add nuw nsw i64 %add.7, %zext.8
923 %gep.8 = getelementptr inbounds i8, ptr %ptr, i64 9
924 %ld9 = load i8, ptr %gep.8
925 %zext.9 = zext i8 %ld9 to i64
926 %add.9 = add nuw nsw i64 %add.8, %zext.9
927 %gep.9 = getelementptr inbounds i8, ptr %ptr, i64 10
928 %ld10 = load i8, ptr %gep.9
929 %zext.10 = zext i8 %ld10 to i64
930 %add.10 = add nuw nsw i64 %add.9, %zext.10
931 %gep.10 = getelementptr inbounds i8, ptr %ptr, i64 11
932 %ld11 = load i8, ptr %gep.10
933 %zext.11 = zext i8 %ld11 to i64
934 %add.11 = add nuw nsw i64 %add.10, %zext.11
935 %gep.11 = getelementptr inbounds i8, ptr %ptr, i64 12
936 %ld12 = load i8, ptr %gep.11
937 %zext.12 = zext i8 %ld12 to i64
938 %add.12 = add nuw nsw i64 %add.11, %zext.12
939 %gep.12 = getelementptr inbounds i8, ptr %ptr, i64 13
940 %ld13 = load i8, ptr %gep.12
941 %zext.13 = zext i8 %ld13 to i64
942 %add.13 = add nuw nsw i64 %add.12, %zext.13
943 %gep.13 = getelementptr inbounds i8, ptr %ptr, i64 14
944 %ld14 = load i8, ptr %gep.13
945 %zext.14 = zext i8 %ld14 to i64
946 %add.14 = add nuw nsw i64 %add.13, %zext.14
947 %gep.14 = getelementptr inbounds i8, ptr %ptr, i64 15
948 %ld15 = load i8, ptr %gep.14
949 %zext.15 = zext i8 %ld15 to i64
950 %add.15 = add nuw nsw i64 %add.14, %zext.15
954 declare i32 @llvm.abs.i32(i32, i1)
956 define i32 @stride_sum_abs_diff(ptr %p, ptr %q, i64 %stride) {
957 ; CHECK-LABEL: @stride_sum_abs_diff(
958 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 4
959 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[Q:%.*]], align 4
960 ; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[STRIDE:%.*]]
961 ; CHECK-NEXT: [[Q_2:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[STRIDE]]
962 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[P_2]], align 4
963 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[Q_2]], align 4
964 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
965 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
966 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
967 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
968 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
969 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
970 ; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> [[TMP7]], [[TMP10]]
971 ; CHECK-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP11]], i1 true)
972 ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP12]])
973 ; CHECK-NEXT: ret i32 [[TMP13]]
975 %x.0 = load i32, ptr %p
976 %y.0 = load i32, ptr %q
977 %sub.0 = sub i32 %x.0, %y.0
978 %abs.0 = tail call i32 @llvm.abs.i32(i32 %sub.0, i1 true)
980 %p.1 = getelementptr inbounds i32, ptr %p, i64 1
981 %x.1 = load i32, ptr %p.1
982 %q.1 = getelementptr inbounds i32, ptr %q, i64 1
983 %y.1 = load i32, ptr %q.1
984 %sub.1 = sub i32 %x.1, %y.1
985 %abs.1 = tail call i32 @llvm.abs.i32(i32 %sub.1, i1 true)
986 %sum.0 = add i32 %abs.0, %abs.1
988 %p.2 = getelementptr inbounds i32, ptr %p, i64 %stride
989 %q.2 = getelementptr inbounds i32, ptr %q, i64 %stride
991 %x.2 = load i32, ptr %p.2
992 %y.2 = load i32, ptr %q.2
993 %sub.2 = sub i32 %x.2, %y.2
994 %abs.2 = tail call i32 @llvm.abs.i32(i32 %sub.2, i1 true)
995 %sum.1 = add i32 %sum.0, %abs.2
997 %p.3 = getelementptr inbounds i32, ptr %p.2, i64 1
998 %x.3 = load i32, ptr %p.3
999 %q.3 = getelementptr inbounds i32, ptr %q.2, i64 1
1000 %y.3 = load i32, ptr %q.3
1001 %sub.3 = sub i32 %x.3, %y.3
1002 %abs.3 = tail call i32 @llvm.abs.i32(i32 %sub.3, i1 true)
1003 %sum.2 = add i32 %sum.1, %abs.3
1008 define i32 @reduce_sum_2arrays_a(ptr noalias %p, ptr noalias %q) {
1009 ; CHECK-LABEL: @reduce_sum_2arrays_a(
1010 ; CHECK-NEXT: entry:
1011 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
1012 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[Q:%.*]], align 1
1013 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
1014 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
1015 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1016 ; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i32>
1017 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
1018 ; CHECK-NEXT: ret i32 [[TMP6]]
1021 %x.0 = load i8, ptr %p, align 1
1022 %conv = zext i8 %x.0 to i32
1023 %y.0 = load i8, ptr %q, align 1
1024 %conv3 = zext i8 %y.0 to i32
1025 %add4 = add nuw nsw i32 %conv, %conv3
1027 %arrayidx.1 = getelementptr inbounds i8, ptr %p, i64 1
1028 %x.1 = load i8, ptr %arrayidx.1, align 1
1029 %conv.1 = zext i8 %x.1 to i32
1030 %arrayidx2.1 = getelementptr inbounds i8, ptr %q, i64 1
1031 %y.1 = load i8, ptr %arrayidx2.1, align 1
1032 %conv3.1 = zext i8 %y.1 to i32
1033 %add.1 = add nuw nsw i32 %add4, %conv.1
1034 %add4.1 = add nuw nsw i32 %add.1, %conv3.1
1036 %arrayidx.2 = getelementptr inbounds i8, ptr %p, i64 2
1037 %x.2 = load i8, ptr %arrayidx.2, align 1
1038 %conv.2 = zext i8 %x.2 to i32
1039 %arrayidx2.2 = getelementptr inbounds i8, ptr %q, i64 2
1040 %y.2 = load i8, ptr %arrayidx2.2, align 1
1041 %conv3.2 = zext i8 %y.2 to i32
1042 %add.2 = add nuw nsw i32 %add4.1, %conv.2
1043 %add4.2 = add nuw nsw i32 %add.2, %conv3.2
1045 %arrayidx.3 = getelementptr inbounds i8, ptr %p, i64 3
1046 %x.3 = load i8, ptr %arrayidx.3, align 1
1047 %conv.3 = zext i8 %x.3 to i32
1048 %arrayidx2.3 = getelementptr inbounds i8, ptr %q, i64 3
1049 %y.3 = load i8, ptr %arrayidx2.3, align 1
1050 %conv3.3 = zext i8 %y.3 to i32
1051 %add.3 = add nuw nsw i32 %add4.2, %conv.3
1052 %add4.3 = add nuw nsw i32 %add.3, %conv3.3
1057 define i32 @reduce_sum_2arrays_b(ptr noalias noundef %x, ptr noalias %y) {
1058 ; CHECK-LABEL: @reduce_sum_2arrays_b(
1059 ; CHECK-NEXT: entry:
1060 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[X:%.*]], align 1
1061 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[Y:%.*]], align 1
1062 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
1063 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
1064 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1065 ; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i32>
1066 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
1067 ; CHECK-NEXT: ret i32 [[TMP6]]
1070 %0 = load i8, ptr %x, align 1
1071 %conv = zext i8 %0 to i32
1072 %arrayidx.1 = getelementptr inbounds i8, ptr %x, i64 1
1073 %1 = load i8, ptr %arrayidx.1, align 1
1074 %conv.1 = zext i8 %1 to i32
1075 %add.1 = add nuw nsw i32 %conv, %conv.1
1076 %arrayidx.2 = getelementptr inbounds i8, ptr %x, i64 2
1077 %2 = load i8, ptr %arrayidx.2, align 1
1078 %conv.2 = zext i8 %2 to i32
1079 %add.2 = add nuw nsw i32 %add.1, %conv.2
1080 %arrayidx.3 = getelementptr inbounds i8, ptr %x, i64 3
1081 %3 = load i8, ptr %arrayidx.3, align 1
1082 %conv.3 = zext i8 %3 to i32
1083 %add.3 = add nuw nsw i32 %add.2, %conv.3
1084 %4 = load i8, ptr %y, align 1
1085 %conv9 = zext i8 %4 to i32
1086 %add10 = add nuw nsw i32 %add.3, %conv9
1087 %arrayidx8.1 = getelementptr inbounds i8, ptr %y, i64 1
1088 %5 = load i8, ptr %arrayidx8.1, align 1
1089 %conv9.1 = zext i8 %5 to i32
1090 %add10.1 = add nuw nsw i32 %add10, %conv9.1
1091 %arrayidx8.2 = getelementptr inbounds i8, ptr %y, i64 2
1092 %6 = load i8, ptr %arrayidx8.2, align 1
1093 %conv9.2 = zext i8 %6 to i32
1094 %add10.2 = add nuw nsw i32 %add10.1, %conv9.2
1095 %arrayidx8.3 = getelementptr inbounds i8, ptr %y, i64 3
1096 %7 = load i8, ptr %arrayidx8.3, align 1
1097 %conv9.3 = zext i8 %7 to i32
1098 %add10.3 = add nuw nsw i32 %add10.2, %conv9.3