1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64 -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v2 -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
4 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v3 -S | FileCheck %s --check-prefixes=CHECK,AVX
5 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v4 -S | FileCheck %s --check-prefixes=CHECK,AVX512
8 ; unsigned long bitmask_16xi8(const char *src) {
9 ; unsigned long mask = 0;
10 ; for (unsigned i = 0; i != 16; ++i) {
12 ; mask |= (1ull << i);
17 define i64 @bitmask_16xi8(ptr nocapture noundef readonly %src) {
18 ; CHECK-LABEL: @bitmask_16xi8(
20 ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[SRC:%.*]], align 1
21 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i8 [[TMP0]], 0
22 ; CHECK-NEXT: [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
23 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
24 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX_1]], align 1
25 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[TMP1]], zeroinitializer
26 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> zeroinitializer, <8 x i64> <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
27 ; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9
28 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ARRAYIDX_9]], align 1
29 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[TMP4]], zeroinitializer
30 ; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> zeroinitializer, <4 x i64> <i64 512, i64 1024, i64 2048, i64 4096>
31 ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 13
32 ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX_13]], align 1
33 ; CHECK-NEXT: [[TOBOOL_NOT_13:%.*]] = icmp eq i8 [[TMP7]], 0
34 ; CHECK-NEXT: [[OR_13:%.*]] = select i1 [[TOBOOL_NOT_13]], i64 0, i64 8192
35 ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 14
36 ; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX_14]], align 1
37 ; CHECK-NEXT: [[TOBOOL_NOT_14:%.*]] = icmp eq i8 [[TMP8]], 0
38 ; CHECK-NEXT: [[OR_14:%.*]] = select i1 [[TOBOOL_NOT_14]], i64 0, i64 16384
39 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 15
40 ; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX_15]], align 1
41 ; CHECK-NEXT: [[TOBOOL_NOT_15:%.*]] = icmp eq i8 [[TMP9]], 0
42 ; CHECK-NEXT: [[OR_15:%.*]] = select i1 [[TOBOOL_NOT_15]], i64 0, i64 32768
43 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP3]])
44 ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP6]])
45 ; CHECK-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP10]], [[TMP11]]
46 ; CHECK-NEXT: [[OP_RDX1:%.*]] = or i64 [[OP_RDX]], [[OR_13]]
47 ; CHECK-NEXT: [[OP_RDX2:%.*]] = or i64 [[OR_14]], [[OR_15]]
48 ; CHECK-NEXT: [[OP_RDX3:%.*]] = or i64 [[OP_RDX1]], [[OP_RDX2]]
49 ; CHECK-NEXT: [[OP_RDX4:%.*]] = or i64 [[OP_RDX3]], [[OR]]
50 ; CHECK-NEXT: ret i64 [[OP_RDX4]]
53 %0 = load i8, ptr %src, align 1
54 %tobool.not = icmp ne i8 %0, 0
55 %or = zext i1 %tobool.not to i64
56 %arrayidx.1 = getelementptr inbounds i8, ptr %src, i64 1
57 %1 = load i8, ptr %arrayidx.1, align 1
58 %tobool.not.1 = icmp eq i8 %1, 0
59 %or.1 = select i1 %tobool.not.1, i64 0, i64 2
60 %mask.1.1 = or i64 %or.1, %or
61 %arrayidx.2 = getelementptr inbounds i8, ptr %src, i64 2
62 %2 = load i8, ptr %arrayidx.2, align 1
63 %tobool.not.2 = icmp eq i8 %2, 0
64 %or.2 = select i1 %tobool.not.2, i64 0, i64 4
65 %mask.1.2 = or i64 %or.2, %mask.1.1
66 %arrayidx.3 = getelementptr inbounds i8, ptr %src, i64 3
67 %3 = load i8, ptr %arrayidx.3, align 1
68 %tobool.not.3 = icmp eq i8 %3, 0
69 %or.3 = select i1 %tobool.not.3, i64 0, i64 8
70 %mask.1.3 = or i64 %or.3, %mask.1.2
71 %arrayidx.4 = getelementptr inbounds i8, ptr %src, i64 4
72 %4 = load i8, ptr %arrayidx.4, align 1
73 %tobool.not.4 = icmp eq i8 %4, 0
74 %or.4 = select i1 %tobool.not.4, i64 0, i64 16
75 %mask.1.4 = or i64 %or.4, %mask.1.3
76 %arrayidx.5 = getelementptr inbounds i8, ptr %src, i64 5
77 %5 = load i8, ptr %arrayidx.5, align 1
78 %tobool.not.5 = icmp eq i8 %5, 0
79 %or.5 = select i1 %tobool.not.5, i64 0, i64 32
80 %mask.1.5 = or i64 %or.5, %mask.1.4
81 %arrayidx.6 = getelementptr inbounds i8, ptr %src, i64 6
82 %6 = load i8, ptr %arrayidx.6, align 1
83 %tobool.not.6 = icmp eq i8 %6, 0
84 %or.6 = select i1 %tobool.not.6, i64 0, i64 64
85 %mask.1.6 = or i64 %or.6, %mask.1.5
86 %arrayidx.7 = getelementptr inbounds i8, ptr %src, i64 7
87 %7 = load i8, ptr %arrayidx.7, align 1
88 %tobool.not.7 = icmp eq i8 %7, 0
89 %or.7 = select i1 %tobool.not.7, i64 0, i64 128
90 %mask.1.7 = or i64 %or.7, %mask.1.6
91 %arrayidx.8 = getelementptr inbounds i8, ptr %src, i64 8
92 %8 = load i8, ptr %arrayidx.8, align 1
93 %tobool.not.8 = icmp eq i8 %8, 0
94 %or.8 = select i1 %tobool.not.8, i64 0, i64 256
95 %mask.1.8 = or i64 %or.8, %mask.1.7
96 %arrayidx.9 = getelementptr inbounds i8, ptr %src, i64 9
97 %9 = load i8, ptr %arrayidx.9, align 1
98 %tobool.not.9 = icmp eq i8 %9, 0
99 %or.9 = select i1 %tobool.not.9, i64 0, i64 512
100 %mask.1.9 = or i64 %or.9, %mask.1.8
101 %arrayidx.10 = getelementptr inbounds i8, ptr %src, i64 10
102 %10 = load i8, ptr %arrayidx.10, align 1
103 %tobool.not.10 = icmp eq i8 %10, 0
104 %or.10 = select i1 %tobool.not.10, i64 0, i64 1024
105 %mask.1.10 = or i64 %or.10, %mask.1.9
106 %arrayidx.11 = getelementptr inbounds i8, ptr %src, i64 11
107 %11 = load i8, ptr %arrayidx.11, align 1
108 %tobool.not.11 = icmp eq i8 %11, 0
109 %or.11 = select i1 %tobool.not.11, i64 0, i64 2048
110 %mask.1.11 = or i64 %or.11, %mask.1.10
111 %arrayidx.12 = getelementptr inbounds i8, ptr %src, i64 12
112 %12 = load i8, ptr %arrayidx.12, align 1
113 %tobool.not.12 = icmp eq i8 %12, 0
114 %or.12 = select i1 %tobool.not.12, i64 0, i64 4096
115 %mask.1.12 = or i64 %or.12, %mask.1.11
116 %arrayidx.13 = getelementptr inbounds i8, ptr %src, i64 13
117 %13 = load i8, ptr %arrayidx.13, align 1
118 %tobool.not.13 = icmp eq i8 %13, 0
119 %or.13 = select i1 %tobool.not.13, i64 0, i64 8192
120 %mask.1.13 = or i64 %or.13, %mask.1.12
121 %arrayidx.14 = getelementptr inbounds i8, ptr %src, i64 14
122 %14 = load i8, ptr %arrayidx.14, align 1
123 %tobool.not.14 = icmp eq i8 %14, 0
124 %or.14 = select i1 %tobool.not.14, i64 0, i64 16384
125 %mask.1.14 = or i64 %or.14, %mask.1.13
126 %arrayidx.15 = getelementptr inbounds i8, ptr %src, i64 15
127 %15 = load i8, ptr %arrayidx.15, align 1
128 %tobool.not.15 = icmp eq i8 %15, 0
129 %or.15 = select i1 %tobool.not.15, i64 0, i64 32768
130 %mask.1.15 = or i64 %or.15, %mask.1.14
134 define i64 @bitmask_4xi16(ptr nocapture noundef readonly %src) {
135 ; SSE-LABEL: @bitmask_4xi16(
137 ; SSE-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC:%.*]], align 2
138 ; SSE-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i16 [[TMP0]], 0
139 ; SSE-NEXT: [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
140 ; SSE-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 1
141 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_1]], align 2
142 ; SSE-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
143 ; SSE-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
144 ; SSE-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 5
145 ; SSE-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_5]], align 2
146 ; SSE-NEXT: [[TOBOOL_NOT_5:%.*]] = icmp eq i16 [[TMP4]], 0
147 ; SSE-NEXT: [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
148 ; SSE-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 6
149 ; SSE-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_6]], align 2
150 ; SSE-NEXT: [[TOBOOL_NOT_6:%.*]] = icmp eq i16 [[TMP5]], 0
151 ; SSE-NEXT: [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
152 ; SSE-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 7
153 ; SSE-NEXT: [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_7]], align 2
154 ; SSE-NEXT: [[TOBOOL_NOT_7:%.*]] = icmp eq i16 [[TMP6]], 0
155 ; SSE-NEXT: [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
156 ; SSE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
157 ; SSE-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
158 ; SSE-NEXT: [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
159 ; SSE-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
160 ; SSE-NEXT: [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
161 ; SSE-NEXT: ret i64 [[OP_RDX3]]
163 ; AVX-LABEL: @bitmask_4xi16(
165 ; AVX-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC:%.*]], align 2
166 ; AVX-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i16 [[TMP0]], 0
167 ; AVX-NEXT: [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
168 ; AVX-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 1
169 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_1]], align 2
170 ; AVX-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
171 ; AVX-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
172 ; AVX-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 5
173 ; AVX-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_5]], align 2
174 ; AVX-NEXT: [[TOBOOL_NOT_5:%.*]] = icmp eq i16 [[TMP4]], 0
175 ; AVX-NEXT: [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
176 ; AVX-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 6
177 ; AVX-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_6]], align 2
178 ; AVX-NEXT: [[TOBOOL_NOT_6:%.*]] = icmp eq i16 [[TMP5]], 0
179 ; AVX-NEXT: [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
180 ; AVX-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 7
181 ; AVX-NEXT: [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_7]], align 2
182 ; AVX-NEXT: [[TOBOOL_NOT_7:%.*]] = icmp eq i16 [[TMP6]], 0
183 ; AVX-NEXT: [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
184 ; AVX-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
185 ; AVX-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
186 ; AVX-NEXT: [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
187 ; AVX-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
188 ; AVX-NEXT: [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
189 ; AVX-NEXT: ret i64 [[OP_RDX3]]
191 ; AVX512-LABEL: @bitmask_4xi16(
192 ; AVX512-NEXT: entry:
193 ; AVX512-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC:%.*]], align 2
194 ; AVX512-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i16 [[TMP0]], 0
195 ; AVX512-NEXT: [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
196 ; AVX512-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 1
197 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_1]], align 2
198 ; AVX512-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
199 ; AVX512-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
200 ; AVX512-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 5
201 ; AVX512-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_5]], align 2
202 ; AVX512-NEXT: [[TOBOOL_NOT_5:%.*]] = icmp eq i16 [[TMP4]], 0
203 ; AVX512-NEXT: [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
204 ; AVX512-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 6
205 ; AVX512-NEXT: [[TMP5:%.*]] = load <2 x i16>, ptr [[ARRAYIDX_6]], align 2
206 ; AVX512-NEXT: [[TMP6:%.*]] = icmp eq <2 x i16> [[TMP5]], zeroinitializer
207 ; AVX512-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> zeroinitializer, <2 x i64> <i64 64, i64 128>
208 ; AVX512-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
209 ; AVX512-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP8]], [[OR_5]]
210 ; AVX512-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
211 ; AVX512-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
212 ; AVX512-NEXT: [[OP_RDX1:%.*]] = or i64 [[TMP9]], [[TMP10]]
213 ; AVX512-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
214 ; AVX512-NEXT: [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
215 ; AVX512-NEXT: ret i64 [[OP_RDX3]]
218 %0 = load i16, ptr %src, align 2
219 %tobool.not = icmp ne i16 %0, 0
220 %or = zext i1 %tobool.not to i64
221 %arrayidx.1 = getelementptr inbounds i16, ptr %src, i64 1
222 %1 = load i16, ptr %arrayidx.1, align 2
223 %tobool.not.1 = icmp eq i16 %1, 0
224 %or.1 = select i1 %tobool.not.1, i64 0, i64 2
225 %mask.1.1 = or i64 %or.1, %or
226 %arrayidx.2 = getelementptr inbounds i16, ptr %src, i64 2
227 %2 = load i16, ptr %arrayidx.2, align 2
228 %tobool.not.2 = icmp eq i16 %2, 0
229 %or.2 = select i1 %tobool.not.2, i64 0, i64 4
230 %mask.1.2 = or i64 %or.2, %mask.1.1
231 %arrayidx.3 = getelementptr inbounds i16, ptr %src, i64 3
232 %3 = load i16, ptr %arrayidx.3, align 2
233 %tobool.not.3 = icmp eq i16 %3, 0
234 %or.3 = select i1 %tobool.not.3, i64 0, i64 8
235 %mask.1.3 = or i64 %or.3, %mask.1.2
236 %arrayidx.4 = getelementptr inbounds i16, ptr %src, i64 4
237 %4 = load i16, ptr %arrayidx.4, align 2
238 %tobool.not.4 = icmp eq i16 %4, 0
239 %or.4 = select i1 %tobool.not.4, i64 0, i64 16
240 %mask.1.4 = or i64 %or.4, %mask.1.3
241 %arrayidx.5 = getelementptr inbounds i16, ptr %src, i64 5
242 %5 = load i16, ptr %arrayidx.5, align 2
243 %tobool.not.5 = icmp eq i16 %5, 0
244 %or.5 = select i1 %tobool.not.5, i64 0, i64 32
245 %mask.1.5 = or i64 %or.5, %mask.1.4
246 %arrayidx.6 = getelementptr inbounds i16, ptr %src, i64 6
247 %6 = load i16, ptr %arrayidx.6, align 2
248 %tobool.not.6 = icmp eq i16 %6, 0
249 %or.6 = select i1 %tobool.not.6, i64 0, i64 64
250 %mask.1.6 = or i64 %or.6, %mask.1.5
251 %arrayidx.7 = getelementptr inbounds i16, ptr %src, i64 7
252 %7 = load i16, ptr %arrayidx.7, align 2
253 %tobool.not.7 = icmp eq i16 %7, 0
254 %or.7 = select i1 %tobool.not.7, i64 0, i64 128
255 %mask.1.7 = or i64 %or.7, %mask.1.6
259 define i64 @bitmask_8xi32(ptr nocapture noundef readonly %src) {
260 ; SSE-LABEL: @bitmask_8xi32(
262 ; SSE-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
263 ; SSE-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i32 [[TMP0]], 0
264 ; SSE-NEXT: [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
265 ; SSE-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
266 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_1]], align 4
267 ; SSE-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
268 ; SSE-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
269 ; SSE-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 5
270 ; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
271 ; SSE-NEXT: [[TOBOOL_NOT_5:%.*]] = icmp eq i32 [[TMP4]], 0
272 ; SSE-NEXT: [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
273 ; SSE-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 6
274 ; SSE-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
275 ; SSE-NEXT: [[TOBOOL_NOT_6:%.*]] = icmp eq i32 [[TMP5]], 0
276 ; SSE-NEXT: [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
277 ; SSE-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 7
278 ; SSE-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
279 ; SSE-NEXT: [[TOBOOL_NOT_7:%.*]] = icmp eq i32 [[TMP6]], 0
280 ; SSE-NEXT: [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
281 ; SSE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
282 ; SSE-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
283 ; SSE-NEXT: [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
284 ; SSE-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
285 ; SSE-NEXT: [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
286 ; SSE-NEXT: ret i64 [[OP_RDX3]]
288 ; AVX-LABEL: @bitmask_8xi32(
290 ; AVX-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
291 ; AVX-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i32 [[TMP0]], 0
292 ; AVX-NEXT: [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
293 ; AVX-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
294 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_1]], align 4
295 ; AVX-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
296 ; AVX-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
297 ; AVX-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 5
298 ; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
299 ; AVX-NEXT: [[TOBOOL_NOT_5:%.*]] = icmp eq i32 [[TMP4]], 0
300 ; AVX-NEXT: [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
301 ; AVX-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 6
302 ; AVX-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
303 ; AVX-NEXT: [[TOBOOL_NOT_6:%.*]] = icmp eq i32 [[TMP5]], 0
304 ; AVX-NEXT: [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
305 ; AVX-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 7
306 ; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
307 ; AVX-NEXT: [[TOBOOL_NOT_7:%.*]] = icmp eq i32 [[TMP6]], 0
308 ; AVX-NEXT: [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
309 ; AVX-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
310 ; AVX-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
311 ; AVX-NEXT: [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
312 ; AVX-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
313 ; AVX-NEXT: [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
314 ; AVX-NEXT: ret i64 [[OP_RDX3]]
316 ; AVX512-LABEL: @bitmask_8xi32(
317 ; AVX512-NEXT: entry:
318 ; AVX512-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
319 ; AVX512-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i32 [[TMP0]], 0
320 ; AVX512-NEXT: [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
321 ; AVX512-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
322 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_1]], align 4
323 ; AVX512-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
324 ; AVX512-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
325 ; AVX512-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 5
326 ; AVX512-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
327 ; AVX512-NEXT: [[TOBOOL_NOT_5:%.*]] = icmp eq i32 [[TMP4]], 0
328 ; AVX512-NEXT: [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
329 ; AVX512-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 6
330 ; AVX512-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX_6]], align 4
331 ; AVX512-NEXT: [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP5]], zeroinitializer
332 ; AVX512-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> zeroinitializer, <2 x i64> <i64 64, i64 128>
333 ; AVX512-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
334 ; AVX512-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP8]], [[OR_5]]
335 ; AVX512-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
336 ; AVX512-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
337 ; AVX512-NEXT: [[OP_RDX1:%.*]] = or i64 [[TMP9]], [[TMP10]]
338 ; AVX512-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
339 ; AVX512-NEXT: [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
340 ; AVX512-NEXT: ret i64 [[OP_RDX3]]
343 %0 = load i32, ptr %src, align 4
344 %tobool.not = icmp ne i32 %0, 0
345 %or = zext i1 %tobool.not to i64
346 %arrayidx.1 = getelementptr inbounds i32, ptr %src, i64 1
347 %1 = load i32, ptr %arrayidx.1, align 4
348 %tobool.not.1 = icmp eq i32 %1, 0
349 %or.1 = select i1 %tobool.not.1, i64 0, i64 2
350 %mask.1.1 = or i64 %or.1, %or
351 %arrayidx.2 = getelementptr inbounds i32, ptr %src, i64 2
352 %2 = load i32, ptr %arrayidx.2, align 4
353 %tobool.not.2 = icmp eq i32 %2, 0
354 %or.2 = select i1 %tobool.not.2, i64 0, i64 4
355 %mask.1.2 = or i64 %or.2, %mask.1.1
356 %arrayidx.3 = getelementptr inbounds i32, ptr %src, i64 3
357 %3 = load i32, ptr %arrayidx.3, align 4
358 %tobool.not.3 = icmp eq i32 %3, 0
359 %or.3 = select i1 %tobool.not.3, i64 0, i64 8
360 %mask.1.3 = or i64 %or.3, %mask.1.2
361 %arrayidx.4 = getelementptr inbounds i32, ptr %src, i64 4
362 %4 = load i32, ptr %arrayidx.4, align 4
363 %tobool.not.4 = icmp eq i32 %4, 0
364 %or.4 = select i1 %tobool.not.4, i64 0, i64 16
365 %mask.1.4 = or i64 %or.4, %mask.1.3
366 %arrayidx.5 = getelementptr inbounds i32, ptr %src, i64 5
367 %5 = load i32, ptr %arrayidx.5, align 4
368 %tobool.not.5 = icmp eq i32 %5, 0
369 %or.5 = select i1 %tobool.not.5, i64 0, i64 32
370 %mask.1.5 = or i64 %or.5, %mask.1.4
371 %arrayidx.6 = getelementptr inbounds i32, ptr %src, i64 6
372 %6 = load i32, ptr %arrayidx.6, align 4
373 %tobool.not.6 = icmp eq i32 %6, 0
374 %or.6 = select i1 %tobool.not.6, i64 0, i64 64
375 %mask.1.6 = or i64 %or.6, %mask.1.5
376 %arrayidx.7 = getelementptr inbounds i32, ptr %src, i64 7
377 %7 = load i32, ptr %arrayidx.7, align 4
378 %tobool.not.7 = icmp eq i32 %7, 0
379 %or.7 = select i1 %tobool.not.7, i64 0, i64 128
380 %mask.1.7 = or i64 %or.7, %mask.1.6
384 define i64 @bitmask_8xi64(ptr nocapture noundef readonly %src) {
385 ; SSE2-LABEL: @bitmask_8xi64(
387 ; SSE2-NEXT: [[TMP0:%.*]] = load i64, ptr [[SRC:%.*]], align 8
388 ; SSE2-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i64 [[TMP0]], 0
389 ; SSE2-NEXT: [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
390 ; SSE2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 1
391 ; SSE2-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX_1]], align 8
392 ; SSE2-NEXT: [[TOBOOL_NOT_1:%.*]] = icmp eq i64 [[TMP1]], 0
393 ; SSE2-NEXT: [[OR_1:%.*]] = select i1 [[TOBOOL_NOT_1]], i64 0, i64 2
394 ; SSE2-NEXT: [[MASK_1_1:%.*]] = or i64 [[OR_1]], [[OR]]
395 ; SSE2-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 2
396 ; SSE2-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX_2]], align 8
397 ; SSE2-NEXT: [[TOBOOL_NOT_2:%.*]] = icmp eq i64 [[TMP2]], 0
398 ; SSE2-NEXT: [[OR_2:%.*]] = select i1 [[TOBOOL_NOT_2]], i64 0, i64 4
399 ; SSE2-NEXT: [[MASK_1_2:%.*]] = or i64 [[OR_2]], [[MASK_1_1]]
400 ; SSE2-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 3
401 ; SSE2-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX_3]], align 8
402 ; SSE2-NEXT: [[TOBOOL_NOT_3:%.*]] = icmp eq i64 [[TMP3]], 0
403 ; SSE2-NEXT: [[OR_3:%.*]] = select i1 [[TOBOOL_NOT_3]], i64 0, i64 8
404 ; SSE2-NEXT: [[MASK_1_3:%.*]] = or i64 [[OR_3]], [[MASK_1_2]]
405 ; SSE2-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 4
406 ; SSE2-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARRAYIDX_4]], align 8
407 ; SSE2-NEXT: [[TOBOOL_NOT_4:%.*]] = icmp eq i64 [[TMP4]], 0
408 ; SSE2-NEXT: [[OR_4:%.*]] = select i1 [[TOBOOL_NOT_4]], i64 0, i64 16
409 ; SSE2-NEXT: [[MASK_1_4:%.*]] = or i64 [[OR_4]], [[MASK_1_3]]
410 ; SSE2-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 5
411 ; SSE2-NEXT: [[TMP5:%.*]] = load i64, ptr [[ARRAYIDX_5]], align 8
412 ; SSE2-NEXT: [[TOBOOL_NOT_5:%.*]] = icmp eq i64 [[TMP5]], 0
413 ; SSE2-NEXT: [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
414 ; SSE2-NEXT: [[MASK_1_5:%.*]] = or i64 [[OR_5]], [[MASK_1_4]]
415 ; SSE2-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 6
416 ; SSE2-NEXT: [[TMP6:%.*]] = load i64, ptr [[ARRAYIDX_6]], align 8
417 ; SSE2-NEXT: [[TOBOOL_NOT_6:%.*]] = icmp eq i64 [[TMP6]], 0
418 ; SSE2-NEXT: [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
419 ; SSE2-NEXT: [[MASK_1_6:%.*]] = or i64 [[OR_6]], [[MASK_1_5]]
420 ; SSE2-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 7
421 ; SSE2-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX_7]], align 8
422 ; SSE2-NEXT: [[TOBOOL_NOT_7:%.*]] = icmp eq i64 [[TMP7]], 0
423 ; SSE2-NEXT: [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
424 ; SSE2-NEXT: [[MASK_1_7:%.*]] = or i64 [[OR_7]], [[MASK_1_6]]
425 ; SSE2-NEXT: ret i64 [[MASK_1_7]]
427 ; SSE4-LABEL: @bitmask_8xi64(
429 ; SSE4-NEXT: [[TMP0:%.*]] = load i64, ptr [[SRC:%.*]], align 8
430 ; SSE4-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i64 [[TMP0]], 0
431 ; SSE4-NEXT: [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
432 ; SSE4-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 1
433 ; SSE4-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[ARRAYIDX_1]], align 8
434 ; SSE4-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
435 ; SSE4-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
436 ; SSE4-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 5
437 ; SSE4-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARRAYIDX_5]], align 8
438 ; SSE4-NEXT: [[TOBOOL_NOT_5:%.*]] = icmp eq i64 [[TMP4]], 0
439 ; SSE4-NEXT: [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
440 ; SSE4-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 6
441 ; SSE4-NEXT: [[TMP5:%.*]] = load i64, ptr [[ARRAYIDX_6]], align 8
442 ; SSE4-NEXT: [[TOBOOL_NOT_6:%.*]] = icmp eq i64 [[TMP5]], 0
443 ; SSE4-NEXT: [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
444 ; SSE4-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 7
445 ; SSE4-NEXT: [[TMP6:%.*]] = load i64, ptr [[ARRAYIDX_7]], align 8
446 ; SSE4-NEXT: [[TOBOOL_NOT_7:%.*]] = icmp eq i64 [[TMP6]], 0
447 ; SSE4-NEXT: [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
448 ; SSE4-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
449 ; SSE4-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
450 ; SSE4-NEXT: [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
451 ; SSE4-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
452 ; SSE4-NEXT: [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
453 ; SSE4-NEXT: ret i64 [[OP_RDX3]]
455 ; AVX-LABEL: @bitmask_8xi64(
457 ; AVX-NEXT: [[TMP0:%.*]] = load i64, ptr [[SRC:%.*]], align 8
458 ; AVX-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i64 [[TMP0]], 0
459 ; AVX-NEXT: [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
460 ; AVX-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 1
461 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[ARRAYIDX_1]], align 8
462 ; AVX-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
463 ; AVX-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
464 ; AVX-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 5
465 ; AVX-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARRAYIDX_5]], align 8
466 ; AVX-NEXT: [[TOBOOL_NOT_5:%.*]] = icmp eq i64 [[TMP4]], 0
467 ; AVX-NEXT: [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
468 ; AVX-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 6
469 ; AVX-NEXT: [[TMP5:%.*]] = load i64, ptr [[ARRAYIDX_6]], align 8
470 ; AVX-NEXT: [[TOBOOL_NOT_6:%.*]] = icmp eq i64 [[TMP5]], 0
471 ; AVX-NEXT: [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
472 ; AVX-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 7
473 ; AVX-NEXT: [[TMP6:%.*]] = load i64, ptr [[ARRAYIDX_7]], align 8
474 ; AVX-NEXT: [[TOBOOL_NOT_7:%.*]] = icmp eq i64 [[TMP6]], 0
475 ; AVX-NEXT: [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
476 ; AVX-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
477 ; AVX-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
478 ; AVX-NEXT: [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
479 ; AVX-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
480 ; AVX-NEXT: [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
481 ; AVX-NEXT: ret i64 [[OP_RDX3]]
483 ; AVX512-LABEL: @bitmask_8xi64(
484 ; AVX512-NEXT: entry:
485 ; AVX512-NEXT: [[TMP0:%.*]] = load i64, ptr [[SRC:%.*]], align 8
486 ; AVX512-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i64 [[TMP0]], 0
487 ; AVX512-NEXT: [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
488 ; AVX512-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 1
489 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[ARRAYIDX_1]], align 8
490 ; AVX512-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
491 ; AVX512-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
492 ; AVX512-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 5
493 ; AVX512-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARRAYIDX_5]], align 8
494 ; AVX512-NEXT: [[TOBOOL_NOT_5:%.*]] = icmp eq i64 [[TMP4]], 0
495 ; AVX512-NEXT: [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
496 ; AVX512-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 6
497 ; AVX512-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_6]], align 8
498 ; AVX512-NEXT: [[TMP6:%.*]] = icmp eq <2 x i64> [[TMP5]], zeroinitializer
499 ; AVX512-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> zeroinitializer, <2 x i64> <i64 64, i64 128>
500 ; AVX512-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
501 ; AVX512-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP8]], [[OR_5]]
502 ; AVX512-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
503 ; AVX512-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
504 ; AVX512-NEXT: [[OP_RDX1:%.*]] = or i64 [[TMP9]], [[TMP10]]
505 ; AVX512-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
506 ; AVX512-NEXT: [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
507 ; AVX512-NEXT: ret i64 [[OP_RDX3]]
510 %0 = load i64, ptr %src, align 8
511 %tobool.not = icmp ne i64 %0, 0
512 %or = zext i1 %tobool.not to i64
513 %arrayidx.1 = getelementptr inbounds i64, ptr %src, i64 1
514 %1 = load i64, ptr %arrayidx.1, align 8
515 %tobool.not.1 = icmp eq i64 %1, 0
516 %or.1 = select i1 %tobool.not.1, i64 0, i64 2
517 %mask.1.1 = or i64 %or.1, %or
518 %arrayidx.2 = getelementptr inbounds i64, ptr %src, i64 2
519 %2 = load i64, ptr %arrayidx.2, align 8
520 %tobool.not.2 = icmp eq i64 %2, 0
521 %or.2 = select i1 %tobool.not.2, i64 0, i64 4
522 %mask.1.2 = or i64 %or.2, %mask.1.1
523 %arrayidx.3 = getelementptr inbounds i64, ptr %src, i64 3
524 %3 = load i64, ptr %arrayidx.3, align 8
525 %tobool.not.3 = icmp eq i64 %3, 0
526 %or.3 = select i1 %tobool.not.3, i64 0, i64 8
527 %mask.1.3 = or i64 %or.3, %mask.1.2
528 %arrayidx.4 = getelementptr inbounds i64, ptr %src, i64 4
529 %4 = load i64, ptr %arrayidx.4, align 8
530 %tobool.not.4 = icmp eq i64 %4, 0
531 %or.4 = select i1 %tobool.not.4, i64 0, i64 16
532 %mask.1.4 = or i64 %or.4, %mask.1.3
533 %arrayidx.5 = getelementptr inbounds i64, ptr %src, i64 5
534 %5 = load i64, ptr %arrayidx.5, align 8
535 %tobool.not.5 = icmp eq i64 %5, 0
536 %or.5 = select i1 %tobool.not.5, i64 0, i64 32
537 %mask.1.5 = or i64 %or.5, %mask.1.4
538 %arrayidx.6 = getelementptr inbounds i64, ptr %src, i64 6
539 %6 = load i64, ptr %arrayidx.6, align 8
540 %tobool.not.6 = icmp eq i64 %6, 0
541 %or.6 = select i1 %tobool.not.6, i64 0, i64 64
542 %mask.1.6 = or i64 %or.6, %mask.1.5
543 %arrayidx.7 = getelementptr inbounds i64, ptr %src, i64 7
544 %7 = load i64, ptr %arrayidx.7, align 8
545 %tobool.not.7 = icmp eq i64 %7, 0
546 %or.7 = select i1 %tobool.not.7, i64 0, i64 128
547 %mask.1.7 = or i64 %or.7, %mask.1.6