1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=aarch64 | FileCheck %s
4 ; This test is two versions of the IR extracted from a DSP routine performing
5 ; boolean distance. The first involves a mixture of vector intrinsics and
6 ; scalar code, the second is just scalar. They should both ideally not be
7 ; vectorized any more than the input (unless the codegen has improved
10 define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef readonly %pB, i32 noundef %numberOfBools, ptr nocapture noundef writeonly %cTT, ptr nocapture noundef writeonly %cFF, ptr nocapture noundef writeonly %cTF, ptr nocapture noundef writeonly %cFT) {
11 ; CHECK-LABEL: @dist_vec(
13 ; CHECK-NEXT: [[CMP_NOT264:%.*]] = icmp ult i32 [[NUMBEROFBOOLS:%.*]], 128
14 ; CHECK-NEXT: br i1 [[CMP_NOT264]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
15 ; CHECK: while.body.preheader:
16 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[NUMBEROFBOOLS]], 7
17 ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SHR]], -1
18 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
19 ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
20 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 16
21 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PA:%.*]], i64 [[TMP3]]
22 ; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
24 ; CHECK-NEXT: [[PA_ADDR_0271:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[WHILE_BODY]] ], [ [[PA]], [[WHILE_BODY_PREHEADER]] ]
25 ; CHECK-NEXT: [[PB_ADDR_0270:%.*]] = phi ptr [ [[ADD_PTR8:%.*]], [[WHILE_BODY]] ], [ [[PB:%.*]], [[WHILE_BODY_PREHEADER]] ]
26 ; CHECK-NEXT: [[NBBOOLBLOCK_0269:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[SHR]], [[WHILE_BODY_PREHEADER]] ]
27 ; CHECK-NEXT: [[TMP4TT_0268:%.*]] = phi <2 x i64> [ [[ADD_I:%.*]], [[WHILE_BODY]] ], [ zeroinitializer, [[WHILE_BODY_PREHEADER]] ]
28 ; CHECK-NEXT: [[TMP4FF_0267:%.*]] = phi <2 x i64> [ [[ADD_I253:%.*]], [[WHILE_BODY]] ], [ zeroinitializer, [[WHILE_BODY_PREHEADER]] ]
29 ; CHECK-NEXT: [[TMP4TF_0266:%.*]] = phi <2 x i64> [ [[ADD_I258:%.*]], [[WHILE_BODY]] ], [ zeroinitializer, [[WHILE_BODY_PREHEADER]] ]
30 ; CHECK-NEXT: [[TMP4FT_0265:%.*]] = phi <2 x i64> [ [[ADD_I263:%.*]], [[WHILE_BODY]] ], [ zeroinitializer, [[WHILE_BODY_PREHEADER]] ]
31 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[PA_ADDR_0271]], align 4
32 ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[PB_ADDR_0270]], align 4
33 ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, ptr [[PA_ADDR_0271]], i64 4
34 ; CHECK-NEXT: [[ADD_PTR8]] = getelementptr inbounds i32, ptr [[PB_ADDR_0270]], i64 4
35 ; CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[TMP5]], [[TMP4]]
36 ; CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
37 ; CHECK-NEXT: [[NOT_I242:%.*]] = xor <4 x i32> [[TMP5]], <i32 -1, i32 -1, i32 -1, i32 -1>
38 ; CHECK-NEXT: [[AND_I243:%.*]] = and <4 x i32> [[NOT_I242]], [[NOT_I]]
39 ; CHECK-NEXT: [[AND_I245:%.*]] = and <4 x i32> [[TMP4]], [[NOT_I242]]
40 ; CHECK-NEXT: [[AND_I247:%.*]] = and <4 x i32> [[TMP5]], [[NOT_I]]
41 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[AND_I]] to <16 x i8>
42 ; CHECK-NEXT: [[VCNTQ_V_I:%.*]] = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP6]])
43 ; CHECK-NEXT: [[VPADDL_I:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[VCNTQ_V_I]])
44 ; CHECK-NEXT: [[VPADDL1_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I]])
45 ; CHECK-NEXT: [[VPADDL1_I248:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADDL1_I]])
46 ; CHECK-NEXT: [[ADD_I]] = add <2 x i64> [[VPADDL1_I248]], [[TMP4TT_0268]]
47 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[AND_I243]] to <16 x i8>
48 ; CHECK-NEXT: [[VCNTQ_V_I249:%.*]] = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP7]])
49 ; CHECK-NEXT: [[VPADDL_I250:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[VCNTQ_V_I249]])
50 ; CHECK-NEXT: [[VPADDL1_I251:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I250]])
51 ; CHECK-NEXT: [[VPADDL1_I252:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADDL1_I251]])
52 ; CHECK-NEXT: [[ADD_I253]] = add <2 x i64> [[VPADDL1_I252]], [[TMP4FF_0267]]
53 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[AND_I245]] to <16 x i8>
54 ; CHECK-NEXT: [[VCNTQ_V_I254:%.*]] = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP8]])
55 ; CHECK-NEXT: [[VPADDL_I255:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[VCNTQ_V_I254]])
56 ; CHECK-NEXT: [[VPADDL1_I256:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I255]])
57 ; CHECK-NEXT: [[VPADDL1_I257:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADDL1_I256]])
58 ; CHECK-NEXT: [[ADD_I258]] = add <2 x i64> [[VPADDL1_I257]], [[TMP4TF_0266]]
59 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[AND_I247]] to <16 x i8>
60 ; CHECK-NEXT: [[VCNTQ_V_I259:%.*]] = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP9]])
61 ; CHECK-NEXT: [[VPADDL_I260:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[VCNTQ_V_I259]])
62 ; CHECK-NEXT: [[VPADDL1_I261:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I260]])
63 ; CHECK-NEXT: [[VPADDL1_I262:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADDL1_I261]])
64 ; CHECK-NEXT: [[ADD_I263]] = add <2 x i64> [[VPADDL1_I262]], [[TMP4FT_0265]]
65 ; CHECK-NEXT: [[DEC]] = add nsw i32 [[NBBOOLBLOCK_0269]], -1
66 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
67 ; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
68 ; CHECK: while.end.loopexit:
69 ; CHECK-NEXT: [[SCEVGEP311:%.*]] = getelementptr i8, ptr [[PB]], i64 [[TMP3]]
70 ; CHECK-NEXT: br label [[WHILE_END]]
72 ; CHECK-NEXT: [[TMP4FT_0_LCSSA:%.*]] = phi <2 x i64> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[ADD_I263]], [[WHILE_END_LOOPEXIT]] ]
73 ; CHECK-NEXT: [[TMP4TF_0_LCSSA:%.*]] = phi <2 x i64> [ zeroinitializer, [[ENTRY]] ], [ [[ADD_I258]], [[WHILE_END_LOOPEXIT]] ]
74 ; CHECK-NEXT: [[TMP4FF_0_LCSSA:%.*]] = phi <2 x i64> [ zeroinitializer, [[ENTRY]] ], [ [[ADD_I253]], [[WHILE_END_LOOPEXIT]] ]
75 ; CHECK-NEXT: [[TMP4TT_0_LCSSA:%.*]] = phi <2 x i64> [ zeroinitializer, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_END_LOOPEXIT]] ]
76 ; CHECK-NEXT: [[PB_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PB]], [[ENTRY]] ], [ [[SCEVGEP311]], [[WHILE_END_LOOPEXIT]] ]
77 ; CHECK-NEXT: [[PA_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PA]], [[ENTRY]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ]
78 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP4FT_0_LCSSA]], <2 x i64> [[TMP4TF_0_LCSSA]], <2 x i32> <i32 0, i32 2>
79 ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP4TT_0_LCSSA]], <2 x i64> [[TMP4FF_0_LCSSA]], <2 x i32> <i32 0, i32 2>
80 ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
81 ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP4FT_0_LCSSA]], <2 x i64> [[TMP4TF_0_LCSSA]], <2 x i32> <i32 1, i32 3>
82 ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP4TT_0_LCSSA]], <2 x i64> [[TMP4FF_0_LCSSA]], <2 x i32> <i32 1, i32 3>
83 ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
84 ; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i64> [[TMP12]], [[TMP15]]
85 ; CHECK-NEXT: [[TMP17:%.*]] = trunc <4 x i64> [[TMP16]] to <4 x i32>
86 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[NUMBEROFBOOLS]], 127
87 ; CHECK-NEXT: [[CMP86284:%.*]] = icmp ugt i32 [[AND]], 31
88 ; CHECK-NEXT: br i1 [[CMP86284]], label [[WHILE_BODY88:%.*]], label [[WHILE_END122:%.*]]
89 ; CHECK: while.body88:
90 ; CHECK-NEXT: [[PA_ADDR_1291:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_END121:%.*]] ], [ [[PA_ADDR_0_LCSSA]], [[WHILE_END]] ]
91 ; CHECK-NEXT: [[PB_ADDR_1290:%.*]] = phi ptr [ [[INCDEC_PTR89:%.*]], [[WHILE_END121]] ], [ [[PB_ADDR_0_LCSSA]], [[WHILE_END]] ]
92 ; CHECK-NEXT: [[NBBOOLBLOCK_1285:%.*]] = phi i32 [ [[SUB:%.*]], [[WHILE_END121]] ], [ [[AND]], [[WHILE_END]] ]
93 ; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP34:%.*]], [[WHILE_END121]] ], [ [[TMP17]], [[WHILE_END]] ]
94 ; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[PA_ADDR_1291]], align 4
95 ; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[PB_ADDR_1290]], align 4
96 ; CHECK-NEXT: br label [[WHILE_BODY93:%.*]]
97 ; CHECK: while.body93:
98 ; CHECK-NEXT: [[A_0279:%.*]] = phi i32 [ [[TMP19]], [[WHILE_BODY88]] ], [ [[SHR96:%.*]], [[WHILE_BODY93]] ]
99 ; CHECK-NEXT: [[B_0278:%.*]] = phi i32 [ [[TMP20]], [[WHILE_BODY88]] ], [ [[SHR97:%.*]], [[WHILE_BODY93]] ]
100 ; CHECK-NEXT: [[SHIFT_0277:%.*]] = phi i32 [ 0, [[WHILE_BODY88]] ], [ [[INC:%.*]], [[WHILE_BODY93]] ]
101 ; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i32> [ [[TMP18]], [[WHILE_BODY88]] ], [ [[TMP34]], [[WHILE_BODY93]] ]
102 ; CHECK-NEXT: [[AND94:%.*]] = and i32 [[A_0279]], 1
103 ; CHECK-NEXT: [[AND95:%.*]] = and i32 [[B_0278]], 1
104 ; CHECK-NEXT: [[SHR96]] = lshr i32 [[A_0279]], 1
105 ; CHECK-NEXT: [[SHR97]] = lshr i32 [[B_0278]], 1
106 ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[AND94]], i32 0
107 ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <2 x i32> zeroinitializer
108 ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq <2 x i32> [[TMP23]], zeroinitializer
109 ; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <2 x i32> [[TMP23]], zeroinitializer
110 ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x i1> [[TMP24]], <2 x i1> [[TMP25]], <4 x i32> <i32 0, i32 3, i32 3, i32 0>
111 ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x i32> poison, i32 [[AND95]], i32 0
112 ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x i32> [[TMP27]], <2 x i32> poison, <2 x i32> zeroinitializer
113 ; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <2 x i32> [[TMP28]], zeroinitializer
114 ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq <2 x i32> [[TMP28]], zeroinitializer
115 ; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <2 x i1> [[TMP29]], <2 x i1> [[TMP30]], <4 x i32> <i32 0, i32 3, i32 0, i32 3>
116 ; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP26]], <4 x i1> [[TMP31]], <4 x i1> zeroinitializer
117 ; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i1> [[TMP32]] to <4 x i32>
118 ; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP21]], [[TMP33]]
119 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[SHIFT_0277]], 1
120 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 32
121 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[WHILE_END121]], label [[WHILE_BODY93]]
122 ; CHECK: while.end121:
123 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[PA_ADDR_1291]], i64 1
124 ; CHECK-NEXT: [[INCDEC_PTR89]] = getelementptr inbounds i32, ptr [[PB_ADDR_1290]], i64 1
125 ; CHECK-NEXT: [[SUB]] = add nsw i32 [[NBBOOLBLOCK_1285]], -32
126 ; CHECK-NEXT: [[CMP86:%.*]] = icmp ugt i32 [[SUB]], 31
127 ; CHECK-NEXT: br i1 [[CMP86]], label [[WHILE_BODY88]], label [[WHILE_END122]]
128 ; CHECK: while.end122:
129 ; CHECK-NEXT: [[NBBOOLBLOCK_1_LCSSA:%.*]] = phi i32 [ [[AND]], [[WHILE_END]] ], [ [[SUB]], [[WHILE_END121]] ]
130 ; CHECK-NEXT: [[PB_ADDR_1_LCSSA:%.*]] = phi ptr [ [[PB_ADDR_0_LCSSA]], [[WHILE_END]] ], [ [[INCDEC_PTR89]], [[WHILE_END121]] ]
131 ; CHECK-NEXT: [[PA_ADDR_1_LCSSA:%.*]] = phi ptr [ [[PA_ADDR_0_LCSSA]], [[WHILE_END]] ], [ [[INCDEC_PTR]], [[WHILE_END121]] ]
132 ; CHECK-NEXT: [[TMP35:%.*]] = phi <4 x i32> [ [[TMP17]], [[WHILE_END]] ], [ [[TMP34]], [[WHILE_END121]] ]
133 ; CHECK-NEXT: [[CMP130_NOT299:%.*]] = icmp eq i32 [[NBBOOLBLOCK_1_LCSSA]], 0
134 ; CHECK-NEXT: br i1 [[CMP130_NOT299]], label [[WHILE_END166:%.*]], label [[WHILE_BODY132_PREHEADER:%.*]]
135 ; CHECK: while.body132.preheader:
136 ; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[PB_ADDR_1_LCSSA]], align 4
137 ; CHECK-NEXT: [[SUB125:%.*]] = sub nuw nsw i32 32, [[NBBOOLBLOCK_1_LCSSA]]
138 ; CHECK-NEXT: [[SHR128:%.*]] = lshr i32 [[TMP36]], [[SUB125]]
139 ; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[PA_ADDR_1_LCSSA]], align 4
140 ; CHECK-NEXT: [[SHR126:%.*]] = lshr i32 [[TMP37]], [[SUB125]]
141 ; CHECK-NEXT: br label [[WHILE_BODY132:%.*]]
142 ; CHECK: while.body132:
143 ; CHECK-NEXT: [[NBBOOLBLOCK_2302:%.*]] = phi i32 [ [[DEC165:%.*]], [[WHILE_BODY132]] ], [ [[NBBOOLBLOCK_1_LCSSA]], [[WHILE_BODY132_PREHEADER]] ]
144 ; CHECK-NEXT: [[A_1301:%.*]] = phi i32 [ [[SHR135:%.*]], [[WHILE_BODY132]] ], [ [[SHR126]], [[WHILE_BODY132_PREHEADER]] ]
145 ; CHECK-NEXT: [[B_1300:%.*]] = phi i32 [ [[SHR136:%.*]], [[WHILE_BODY132]] ], [ [[SHR128]], [[WHILE_BODY132_PREHEADER]] ]
146 ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP51:%.*]], [[WHILE_BODY132]] ], [ [[TMP35]], [[WHILE_BODY132_PREHEADER]] ]
147 ; CHECK-NEXT: [[AND133:%.*]] = and i32 [[A_1301]], 1
148 ; CHECK-NEXT: [[AND134:%.*]] = and i32 [[B_1300]], 1
149 ; CHECK-NEXT: [[SHR135]] = lshr i32 [[A_1301]], 1
150 ; CHECK-NEXT: [[SHR136]] = lshr i32 [[B_1300]], 1
151 ; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i32> poison, i32 [[AND133]], i32 0
152 ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <2 x i32> [[TMP39]], <2 x i32> poison, <2 x i32> zeroinitializer
153 ; CHECK-NEXT: [[TMP41:%.*]] = icmp eq <2 x i32> [[TMP40]], zeroinitializer
154 ; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <2 x i32> [[TMP40]], zeroinitializer
155 ; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <2 x i1> [[TMP41]], <2 x i1> [[TMP42]], <4 x i32> <i32 0, i32 3, i32 3, i32 0>
156 ; CHECK-NEXT: [[TMP44:%.*]] = insertelement <2 x i32> poison, i32 [[AND134]], i32 0
157 ; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <2 x i32> [[TMP44]], <2 x i32> poison, <2 x i32> zeroinitializer
158 ; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <2 x i32> [[TMP45]], zeroinitializer
159 ; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i32> [[TMP45]], zeroinitializer
160 ; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <2 x i1> [[TMP46]], <2 x i1> [[TMP47]], <4 x i32> <i32 0, i32 3, i32 0, i32 3>
161 ; CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP43]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
162 ; CHECK-NEXT: [[TMP50:%.*]] = zext <4 x i1> [[TMP49]] to <4 x i32>
163 ; CHECK-NEXT: [[TMP51]] = add <4 x i32> [[TMP38]], [[TMP50]]
164 ; CHECK-NEXT: [[DEC165]] = add nsw i32 [[NBBOOLBLOCK_2302]], -1
165 ; CHECK-NEXT: [[CMP130_NOT:%.*]] = icmp eq i32 [[DEC165]], 0
166 ; CHECK-NEXT: br i1 [[CMP130_NOT]], label [[WHILE_END166]], label [[WHILE_BODY132]]
167 ; CHECK: while.end166:
168 ; CHECK-NEXT: [[TMP52:%.*]] = phi <4 x i32> [ [[TMP35]], [[WHILE_END122]] ], [ [[TMP51]], [[WHILE_BODY132]] ]
169 ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[TMP52]], i32 2
170 ; CHECK-NEXT: store i32 [[TMP53]], ptr [[CTT:%.*]], align 4
171 ; CHECK-NEXT: [[TMP54:%.*]] = extractelement <4 x i32> [[TMP52]], i32 3
172 ; CHECK-NEXT: store i32 [[TMP54]], ptr [[CFF:%.*]], align 4
173 ; CHECK-NEXT: [[TMP55:%.*]] = extractelement <4 x i32> [[TMP52]], i32 1
174 ; CHECK-NEXT: store i32 [[TMP55]], ptr [[CTF:%.*]], align 4
175 ; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i32> [[TMP52]], i32 0
176 ; CHECK-NEXT: store i32 [[TMP56]], ptr [[CFT:%.*]], align 4
177 ; CHECK-NEXT: ret void
180 %cmp.not264 = icmp ult i32 %numberOfBools, 128
181 br i1 %cmp.not264, label %while.end, label %while.body.preheader
183 while.body.preheader: ; preds = %entry
184 %shr = lshr i32 %numberOfBools, 7
185 %0 = add nsw i32 %shr, -1
186 %1 = zext i32 %0 to i64
187 %2 = shl nuw nsw i64 %1, 4
188 %3 = add nuw nsw i64 %2, 16
189 %scevgep = getelementptr i8, ptr %pA, i64 %3
192 while.body: ; preds = %while.body.preheader, %while.body
193 %pA.addr.0271 = phi ptr [ %add.ptr, %while.body ], [ %pA, %while.body.preheader ]
194 %pB.addr.0270 = phi ptr [ %add.ptr8, %while.body ], [ %pB, %while.body.preheader ]
195 %nbBoolBlock.0269 = phi i32 [ %dec, %while.body ], [ %shr, %while.body.preheader ]
196 %tmp4tt.0268 = phi <2 x i64> [ %add.i, %while.body ], [ zeroinitializer, %while.body.preheader ]
197 %tmp4ff.0267 = phi <2 x i64> [ %add.i253, %while.body ], [ zeroinitializer, %while.body.preheader ]
198 %tmp4tf.0266 = phi <2 x i64> [ %add.i258, %while.body ], [ zeroinitializer, %while.body.preheader ]
199 %tmp4ft.0265 = phi <2 x i64> [ %add.i263, %while.body ], [ zeroinitializer, %while.body.preheader ]
200 %4 = load <4 x i32>, ptr %pA.addr.0271, align 4
201 %5 = load <4 x i32>, ptr %pB.addr.0270, align 4
202 %add.ptr = getelementptr inbounds i32, ptr %pA.addr.0271, i64 4
203 %add.ptr8 = getelementptr inbounds i32, ptr %pB.addr.0270, i64 4
204 %and.i = and <4 x i32> %5, %4
205 %not.i = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
206 %not.i242 = xor <4 x i32> %5, <i32 -1, i32 -1, i32 -1, i32 -1>
207 %and.i243 = and <4 x i32> %not.i242, %not.i
208 %and.i245 = and <4 x i32> %4, %not.i242
209 %and.i247 = and <4 x i32> %5, %not.i
210 %6 = bitcast <4 x i32> %and.i to <16 x i8>
211 %vcntq_v.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %6)
212 %vpaddl.i = tail call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %vcntq_v.i)
213 %vpaddl1.i = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %vpaddl.i)
214 %vpaddl1.i248 = tail call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %vpaddl1.i)
215 %add.i = add <2 x i64> %vpaddl1.i248, %tmp4tt.0268
216 %7 = bitcast <4 x i32> %and.i243 to <16 x i8>
217 %vcntq_v.i249 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %7)
218 %vpaddl.i250 = tail call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %vcntq_v.i249)
219 %vpaddl1.i251 = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %vpaddl.i250)
220 %vpaddl1.i252 = tail call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %vpaddl1.i251)
221 %add.i253 = add <2 x i64> %vpaddl1.i252, %tmp4ff.0267
222 %8 = bitcast <4 x i32> %and.i245 to <16 x i8>
223 %vcntq_v.i254 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %8)
224 %vpaddl.i255 = tail call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %vcntq_v.i254)
225 %vpaddl1.i256 = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %vpaddl.i255)
226 %vpaddl1.i257 = tail call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %vpaddl1.i256)
227 %add.i258 = add <2 x i64> %vpaddl1.i257, %tmp4tf.0266
228 %9 = bitcast <4 x i32> %and.i247 to <16 x i8>
229 %vcntq_v.i259 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %9)
230 %vpaddl.i260 = tail call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %vcntq_v.i259)
231 %vpaddl1.i261 = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %vpaddl.i260)
232 %vpaddl1.i262 = tail call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %vpaddl1.i261)
233 %add.i263 = add <2 x i64> %vpaddl1.i262, %tmp4ft.0265
234 %dec = add nsw i32 %nbBoolBlock.0269, -1
235 %cmp.not = icmp eq i32 %dec, 0
236 br i1 %cmp.not, label %while.end.loopexit, label %while.body
238 while.end.loopexit: ; preds = %while.body
239 %scevgep311 = getelementptr i8, ptr %pB, i64 %3
242 while.end: ; preds = %while.end.loopexit, %entry
243 %tmp4ft.0.lcssa = phi <2 x i64> [ zeroinitializer, %entry ], [ %add.i263, %while.end.loopexit ]
244 %tmp4tf.0.lcssa = phi <2 x i64> [ zeroinitializer, %entry ], [ %add.i258, %while.end.loopexit ]
245 %tmp4ff.0.lcssa = phi <2 x i64> [ zeroinitializer, %entry ], [ %add.i253, %while.end.loopexit ]
246 %tmp4tt.0.lcssa = phi <2 x i64> [ zeroinitializer, %entry ], [ %add.i, %while.end.loopexit ]
247 %pB.addr.0.lcssa = phi ptr [ %pB, %entry ], [ %scevgep311, %while.end.loopexit ]
248 %pA.addr.0.lcssa = phi ptr [ %pA, %entry ], [ %scevgep, %while.end.loopexit ]
249 %vgetq_lane = extractelement <2 x i64> %tmp4tt.0.lcssa, i64 0
250 %vgetq_lane45 = extractelement <2 x i64> %tmp4tt.0.lcssa, i64 1
251 %add = add i64 %vgetq_lane, %vgetq_lane45
252 %conv48 = trunc i64 %add to i32
253 %vgetq_lane51 = extractelement <2 x i64> %tmp4ff.0.lcssa, i64 0
254 %vgetq_lane55 = extractelement <2 x i64> %tmp4ff.0.lcssa, i64 1
255 %add57 = add i64 %vgetq_lane51, %vgetq_lane55
256 %conv60 = trunc i64 %add57 to i32
257 %vgetq_lane63 = extractelement <2 x i64> %tmp4tf.0.lcssa, i64 0
258 %vgetq_lane67 = extractelement <2 x i64> %tmp4tf.0.lcssa, i64 1
259 %add69 = add i64 %vgetq_lane63, %vgetq_lane67
260 %conv72 = trunc i64 %add69 to i32
261 %vgetq_lane75 = extractelement <2 x i64> %tmp4ft.0.lcssa, i64 0
262 %vgetq_lane79 = extractelement <2 x i64> %tmp4ft.0.lcssa, i64 1
263 %add81 = add i64 %vgetq_lane75, %vgetq_lane79
264 %conv84 = trunc i64 %add81 to i32
265 %and = and i32 %numberOfBools, 127
266 %cmp86284 = icmp ugt i32 %and, 31
267 br i1 %cmp86284, label %while.body88, label %while.end122
269 while.body88: ; preds = %while.end, %while.end121
270 %pA.addr.1291 = phi ptr [ %incdec.ptr, %while.end121 ], [ %pA.addr.0.lcssa, %while.end ]
271 %pB.addr.1290 = phi ptr [ %incdec.ptr89, %while.end121 ], [ %pB.addr.0.lcssa, %while.end ]
272 %_ctt.0289 = phi i32 [ %add99, %while.end121 ], [ %conv48, %while.end ]
273 %_cff.0288 = phi i32 [ %add106, %while.end121 ], [ %conv60, %while.end ]
274 %_ctf.0287 = phi i32 [ %add113, %while.end121 ], [ %conv72, %while.end ]
275 %_cft.0286 = phi i32 [ %add120, %while.end121 ], [ %conv84, %while.end ]
276 %nbBoolBlock.1285 = phi i32 [ %sub, %while.end121 ], [ %and, %while.end ]
277 %10 = load i32, ptr %pA.addr.1291, align 4
278 %11 = load i32, ptr %pB.addr.1290, align 4
279 br label %while.body93
281 while.body93: ; preds = %while.body88, %while.body93
282 %_ctt.1283 = phi i32 [ %_ctt.0289, %while.body88 ], [ %add99, %while.body93 ]
283 %_cff.1282 = phi i32 [ %_cff.0288, %while.body88 ], [ %add106, %while.body93 ]
284 %_ctf.1281 = phi i32 [ %_ctf.0287, %while.body88 ], [ %add113, %while.body93 ]
285 %_cft.1280 = phi i32 [ %_cft.0286, %while.body88 ], [ %add120, %while.body93 ]
286 %a.0279 = phi i32 [ %10, %while.body88 ], [ %shr96, %while.body93 ]
287 %b.0278 = phi i32 [ %11, %while.body88 ], [ %shr97, %while.body93 ]
288 %shift.0277 = phi i32 [ 0, %while.body88 ], [ %inc, %while.body93 ]
289 %and94 = and i32 %a.0279, 1
290 %and95 = and i32 %b.0278, 1
291 %shr96 = lshr i32 %a.0279, 1
292 %shr97 = lshr i32 %b.0278, 1
293 %tobool = icmp ne i32 %and94, 0
294 %tobool98 = icmp ne i32 %and95, 0
295 %12 = select i1 %tobool, i1 %tobool98, i1 false
296 %land.ext = zext i1 %12 to i32
297 %add99 = add i32 %_ctt.1283, %land.ext
298 %tobool100 = icmp eq i32 %and94, 0
299 %tobool103 = icmp eq i32 %and95, 0
300 %13 = select i1 %tobool100, i1 %tobool103, i1 false
301 %land.ext105 = zext i1 %13 to i32
302 %add106 = add i32 %_cff.1282, %land.ext105
303 %14 = select i1 %tobool, i1 %tobool103, i1 false
304 %land.ext112 = zext i1 %14 to i32
305 %add113 = add i32 %_ctf.1281, %land.ext112
306 %15 = select i1 %tobool100, i1 %tobool98, i1 false
307 %land.ext119 = zext i1 %15 to i32
308 %add120 = add i32 %_cft.1280, %land.ext119
309 %inc = add nuw nsw i32 %shift.0277, 1
310 %exitcond.not = icmp eq i32 %inc, 32
311 br i1 %exitcond.not, label %while.end121, label %while.body93
313 while.end121: ; preds = %while.body93
314 %incdec.ptr = getelementptr inbounds i32, ptr %pA.addr.1291, i64 1
315 %incdec.ptr89 = getelementptr inbounds i32, ptr %pB.addr.1290, i64 1
316 %sub = add nsw i32 %nbBoolBlock.1285, -32
317 %cmp86 = icmp ugt i32 %sub, 31
318 br i1 %cmp86, label %while.body88, label %while.end122
320 while.end122: ; preds = %while.end121, %while.end
321 %nbBoolBlock.1.lcssa = phi i32 [ %and, %while.end ], [ %sub, %while.end121 ]
322 %_cft.0.lcssa = phi i32 [ %conv84, %while.end ], [ %add120, %while.end121 ]
323 %_ctf.0.lcssa = phi i32 [ %conv72, %while.end ], [ %add113, %while.end121 ]
324 %_cff.0.lcssa = phi i32 [ %conv60, %while.end ], [ %add106, %while.end121 ]
325 %_ctt.0.lcssa = phi i32 [ %conv48, %while.end ], [ %add99, %while.end121 ]
326 %pB.addr.1.lcssa = phi ptr [ %pB.addr.0.lcssa, %while.end ], [ %incdec.ptr89, %while.end121 ]
327 %pA.addr.1.lcssa = phi ptr [ %pA.addr.0.lcssa, %while.end ], [ %incdec.ptr, %while.end121 ]
328 %cmp130.not299 = icmp eq i32 %nbBoolBlock.1.lcssa, 0
329 br i1 %cmp130.not299, label %while.end166, label %while.body132.preheader
331 while.body132.preheader: ; preds = %while.end122
332 %16 = load i32, ptr %pB.addr.1.lcssa, align 4
333 %sub125 = sub nuw nsw i32 32, %nbBoolBlock.1.lcssa
334 %shr128 = lshr i32 %16, %sub125
335 %17 = load i32, ptr %pA.addr.1.lcssa, align 4
336 %shr126 = lshr i32 %17, %sub125
337 br label %while.body132
339 while.body132: ; preds = %while.body132.preheader, %while.body132
340 %_ctt.2306 = phi i32 [ %add142, %while.body132 ], [ %_ctt.0.lcssa, %while.body132.preheader ]
341 %_cff.2305 = phi i32 [ %add150, %while.body132 ], [ %_cff.0.lcssa, %while.body132.preheader ]
342 %_ctf.2304 = phi i32 [ %add157, %while.body132 ], [ %_ctf.0.lcssa, %while.body132.preheader ]
343 %_cft.2303 = phi i32 [ %add164, %while.body132 ], [ %_cft.0.lcssa, %while.body132.preheader ]
344 %nbBoolBlock.2302 = phi i32 [ %dec165, %while.body132 ], [ %nbBoolBlock.1.lcssa, %while.body132.preheader ]
345 %a.1301 = phi i32 [ %shr135, %while.body132 ], [ %shr126, %while.body132.preheader ]
346 %b.1300 = phi i32 [ %shr136, %while.body132 ], [ %shr128, %while.body132.preheader ]
347 %and133 = and i32 %a.1301, 1
348 %and134 = and i32 %b.1300, 1
349 %shr135 = lshr i32 %a.1301, 1
350 %shr136 = lshr i32 %b.1300, 1
351 %tobool137 = icmp ne i32 %and133, 0
352 %tobool139 = icmp ne i32 %and134, 0
353 %18 = select i1 %tobool137, i1 %tobool139, i1 false
354 %land.ext141 = zext i1 %18 to i32
355 %add142 = add i32 %_ctt.2306, %land.ext141
356 %tobool144 = icmp eq i32 %and133, 0
357 %tobool147 = icmp eq i32 %and134, 0
358 %19 = select i1 %tobool144, i1 %tobool147, i1 false
359 %land.ext149 = zext i1 %19 to i32
360 %add150 = add i32 %_cff.2305, %land.ext149
361 %20 = select i1 %tobool137, i1 %tobool147, i1 false
362 %land.ext156 = zext i1 %20 to i32
363 %add157 = add i32 %_ctf.2304, %land.ext156
364 %21 = select i1 %tobool144, i1 %tobool139, i1 false
365 %land.ext163 = zext i1 %21 to i32
366 %add164 = add i32 %_cft.2303, %land.ext163
367 %dec165 = add nsw i32 %nbBoolBlock.2302, -1
368 %cmp130.not = icmp eq i32 %dec165, 0
369 br i1 %cmp130.not, label %while.end166, label %while.body132
371 while.end166: ; preds = %while.body132, %while.end122
372 %_cft.2.lcssa = phi i32 [ %_cft.0.lcssa, %while.end122 ], [ %add164, %while.body132 ]
373 %_ctf.2.lcssa = phi i32 [ %_ctf.0.lcssa, %while.end122 ], [ %add157, %while.body132 ]
374 %_cff.2.lcssa = phi i32 [ %_cff.0.lcssa, %while.end122 ], [ %add150, %while.body132 ]
375 %_ctt.2.lcssa = phi i32 [ %_ctt.0.lcssa, %while.end122 ], [ %add142, %while.body132 ]
376 store i32 %_ctt.2.lcssa, ptr %cTT, align 4
377 store i32 %_cff.2.lcssa, ptr %cFF, align 4
378 store i32 %_ctf.2.lcssa, ptr %cTF, align 4
379 store i32 %_cft.2.lcssa, ptr %cFT, align 4
383 define void @scalar(ptr nocapture noundef readonly %pA, ptr nocapture noundef readonly %pB, i32 noundef %numberOfBools, ptr nocapture noundef writeonly %cTT, ptr nocapture noundef writeonly %cFF, ptr nocapture noundef writeonly %cTF, ptr nocapture noundef writeonly %cFT) {
384 ; CHECK-LABEL: @scalar(
386 ; CHECK-NEXT: [[CMP117:%.*]] = icmp ugt i32 [[NUMBEROFBOOLS:%.*]], 31
387 ; CHECK-NEXT: br i1 [[CMP117]], label [[WHILE_BODY:%.*]], label [[WHILE_END29:%.*]]
389 ; CHECK-NEXT: [[_CFT_0124:%.*]] = phi i32 [ [[ADD28:%.*]], [[WHILE_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
390 ; CHECK-NEXT: [[_CTF_0123:%.*]] = phi i32 [ [[ADD21:%.*]], [[WHILE_END]] ], [ 0, [[ENTRY]] ]
391 ; CHECK-NEXT: [[_CFF_0122:%.*]] = phi i32 [ [[ADD14:%.*]], [[WHILE_END]] ], [ 0, [[ENTRY]] ]
392 ; CHECK-NEXT: [[_CTT_0121:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_END]] ], [ 0, [[ENTRY]] ]
393 ; CHECK-NEXT: [[PA_ADDR_0120:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_END]] ], [ [[PA:%.*]], [[ENTRY]] ]
394 ; CHECK-NEXT: [[PB_ADDR_0119:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[WHILE_END]] ], [ [[PB:%.*]], [[ENTRY]] ]
395 ; CHECK-NEXT: [[NUMBEROFBOOLS_ADDR_0118:%.*]] = phi i32 [ [[SUB:%.*]], [[WHILE_END]] ], [ [[NUMBEROFBOOLS]], [[ENTRY]] ]
396 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PA_ADDR_0120]], align 4
397 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PB_ADDR_0119]], align 4
398 ; CHECK-NEXT: br label [[WHILE_BODY4:%.*]]
399 ; CHECK: while.body4:
400 ; CHECK-NEXT: [[SHIFT_0116:%.*]] = phi i32 [ 0, [[WHILE_BODY]] ], [ [[INC:%.*]], [[WHILE_BODY4]] ]
401 ; CHECK-NEXT: [[B_0115:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY]] ], [ [[SHR6:%.*]], [[WHILE_BODY4]] ]
402 ; CHECK-NEXT: [[A_0114:%.*]] = phi i32 [ [[TMP0]], [[WHILE_BODY]] ], [ [[SHR:%.*]], [[WHILE_BODY4]] ]
403 ; CHECK-NEXT: [[_CFT_1113:%.*]] = phi i32 [ [[_CFT_0124]], [[WHILE_BODY]] ], [ [[ADD28]], [[WHILE_BODY4]] ]
404 ; CHECK-NEXT: [[_CTF_1112:%.*]] = phi i32 [ [[_CTF_0123]], [[WHILE_BODY]] ], [ [[ADD21]], [[WHILE_BODY4]] ]
405 ; CHECK-NEXT: [[_CFF_1111:%.*]] = phi i32 [ [[_CFF_0122]], [[WHILE_BODY]] ], [ [[ADD14]], [[WHILE_BODY4]] ]
406 ; CHECK-NEXT: [[_CTT_1110:%.*]] = phi i32 [ [[_CTT_0121]], [[WHILE_BODY]] ], [ [[ADD]], [[WHILE_BODY4]] ]
407 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A_0114]], 1
408 ; CHECK-NEXT: [[AND5:%.*]] = and i32 [[B_0115]], 1
409 ; CHECK-NEXT: [[SHR]] = lshr i32 [[A_0114]], 1
410 ; CHECK-NEXT: [[SHR6]] = lshr i32 [[B_0115]], 1
411 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 0
412 ; CHECK-NEXT: [[TOBOOL7:%.*]] = icmp ne i32 [[AND5]], 0
413 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL7]], i1 false
414 ; CHECK-NEXT: [[LAND_EXT:%.*]] = zext i1 [[TMP2]] to i32
415 ; CHECK-NEXT: [[ADD]] = add i32 [[_CTT_1110]], [[LAND_EXT]]
416 ; CHECK-NEXT: [[TOBOOL8:%.*]] = icmp eq i32 [[AND]], 0
417 ; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[AND5]], 0
418 ; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TOBOOL8]], i1 [[TOBOOL11]], i1 false
419 ; CHECK-NEXT: [[LAND_EXT13:%.*]] = zext i1 [[TMP3]] to i32
420 ; CHECK-NEXT: [[ADD14]] = add i32 [[_CFF_1111]], [[LAND_EXT13]]
421 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL11]], i1 false
422 ; CHECK-NEXT: [[LAND_EXT20:%.*]] = zext i1 [[TMP4]] to i32
423 ; CHECK-NEXT: [[ADD21]] = add i32 [[_CTF_1112]], [[LAND_EXT20]]
424 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TOBOOL8]], i1 [[TOBOOL7]], i1 false
425 ; CHECK-NEXT: [[LAND_EXT27:%.*]] = zext i1 [[TMP5]] to i32
426 ; CHECK-NEXT: [[ADD28]] = add i32 [[_CFT_1113]], [[LAND_EXT27]]
427 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[SHIFT_0116]], 1
428 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 32
429 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[WHILE_END]], label [[WHILE_BODY4]]
431 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[PA_ADDR_0120]], i64 1
432 ; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i32, ptr [[PB_ADDR_0119]], i64 1
433 ; CHECK-NEXT: [[SUB]] = add i32 [[NUMBEROFBOOLS_ADDR_0118]], -32
434 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], 31
435 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_END29]]
436 ; CHECK: while.end29:
437 ; CHECK-NEXT: [[NUMBEROFBOOLS_ADDR_0_LCSSA:%.*]] = phi i32 [ [[NUMBEROFBOOLS]], [[ENTRY]] ], [ [[SUB]], [[WHILE_END]] ]
438 ; CHECK-NEXT: [[PB_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PB]], [[ENTRY]] ], [ [[INCDEC_PTR1]], [[WHILE_END]] ]
439 ; CHECK-NEXT: [[PA_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PA]], [[ENTRY]] ], [ [[INCDEC_PTR]], [[WHILE_END]] ]
440 ; CHECK-NEXT: [[_CTT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD]], [[WHILE_END]] ]
441 ; CHECK-NEXT: [[_CFF_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD14]], [[WHILE_END]] ]
442 ; CHECK-NEXT: [[_CTF_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD21]], [[WHILE_END]] ]
443 ; CHECK-NEXT: [[_CFT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD28]], [[WHILE_END]] ]
444 ; CHECK-NEXT: [[CMP37_NOT131:%.*]] = icmp eq i32 [[NUMBEROFBOOLS_ADDR_0_LCSSA]], 0
445 ; CHECK-NEXT: br i1 [[CMP37_NOT131]], label [[WHILE_END71:%.*]], label [[WHILE_BODY38_PREHEADER:%.*]]
446 ; CHECK: while.body38.preheader:
447 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[PB_ADDR_0_LCSSA]], align 4
448 ; CHECK-NEXT: [[SUB32:%.*]] = sub nuw nsw i32 32, [[NUMBEROFBOOLS_ADDR_0_LCSSA]]
449 ; CHECK-NEXT: [[SHR35:%.*]] = lshr i32 [[TMP6]], [[SUB32]]
450 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[PA_ADDR_0_LCSSA]], align 4
451 ; CHECK-NEXT: [[SHR33:%.*]] = lshr i32 [[TMP7]], [[SUB32]]
452 ; CHECK-NEXT: br label [[WHILE_BODY38:%.*]]
453 ; CHECK: while.body38:
454 ; CHECK-NEXT: [[B_1138:%.*]] = phi i32 [ [[SHR42:%.*]], [[WHILE_BODY38]] ], [ [[SHR35]], [[WHILE_BODY38_PREHEADER]] ]
455 ; CHECK-NEXT: [[A_1137:%.*]] = phi i32 [ [[SHR41:%.*]], [[WHILE_BODY38]] ], [ [[SHR33]], [[WHILE_BODY38_PREHEADER]] ]
456 ; CHECK-NEXT: [[_CFT_2136:%.*]] = phi i32 [ [[ADD70:%.*]], [[WHILE_BODY38]] ], [ [[_CFT_0_LCSSA]], [[WHILE_BODY38_PREHEADER]] ]
457 ; CHECK-NEXT: [[_CTF_2135:%.*]] = phi i32 [ [[ADD63:%.*]], [[WHILE_BODY38]] ], [ [[_CTF_0_LCSSA]], [[WHILE_BODY38_PREHEADER]] ]
458 ; CHECK-NEXT: [[_CFF_2134:%.*]] = phi i32 [ [[ADD56:%.*]], [[WHILE_BODY38]] ], [ [[_CFF_0_LCSSA]], [[WHILE_BODY38_PREHEADER]] ]
459 ; CHECK-NEXT: [[_CTT_2133:%.*]] = phi i32 [ [[ADD48:%.*]], [[WHILE_BODY38]] ], [ [[_CTT_0_LCSSA]], [[WHILE_BODY38_PREHEADER]] ]
460 ; CHECK-NEXT: [[NUMBEROFBOOLS_ADDR_1132:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY38]] ], [ [[NUMBEROFBOOLS_ADDR_0_LCSSA]], [[WHILE_BODY38_PREHEADER]] ]
461 ; CHECK-NEXT: [[AND39:%.*]] = and i32 [[A_1137]], 1
462 ; CHECK-NEXT: [[AND40:%.*]] = and i32 [[B_1138]], 1
463 ; CHECK-NEXT: [[SHR41]] = lshr i32 [[A_1137]], 1
464 ; CHECK-NEXT: [[SHR42]] = lshr i32 [[B_1138]], 1
465 ; CHECK-NEXT: [[TOBOOL43:%.*]] = icmp ne i32 [[AND39]], 0
466 ; CHECK-NEXT: [[TOBOOL45:%.*]] = icmp ne i32 [[AND40]], 0
467 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TOBOOL43]], i1 [[TOBOOL45]], i1 false
468 ; CHECK-NEXT: [[LAND_EXT47:%.*]] = zext i1 [[TMP8]] to i32
469 ; CHECK-NEXT: [[ADD48]] = add i32 [[_CTT_2133]], [[LAND_EXT47]]
470 ; CHECK-NEXT: [[TOBOOL50:%.*]] = icmp eq i32 [[AND39]], 0
471 ; CHECK-NEXT: [[TOBOOL53:%.*]] = icmp eq i32 [[AND40]], 0
472 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TOBOOL50]], i1 [[TOBOOL53]], i1 false
473 ; CHECK-NEXT: [[LAND_EXT55:%.*]] = zext i1 [[TMP9]] to i32
474 ; CHECK-NEXT: [[ADD56]] = add i32 [[_CFF_2134]], [[LAND_EXT55]]
475 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TOBOOL43]], i1 [[TOBOOL53]], i1 false
476 ; CHECK-NEXT: [[LAND_EXT62:%.*]] = zext i1 [[TMP10]] to i32
477 ; CHECK-NEXT: [[ADD63]] = add i32 [[_CTF_2135]], [[LAND_EXT62]]
478 ; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL50]], i1 [[TOBOOL45]], i1 false
479 ; CHECK-NEXT: [[LAND_EXT69:%.*]] = zext i1 [[TMP11]] to i32
480 ; CHECK-NEXT: [[ADD70]] = add i32 [[_CFT_2136]], [[LAND_EXT69]]
481 ; CHECK-NEXT: [[DEC]] = add nsw i32 [[NUMBEROFBOOLS_ADDR_1132]], -1
482 ; CHECK-NEXT: [[CMP37_NOT:%.*]] = icmp eq i32 [[DEC]], 0
483 ; CHECK-NEXT: br i1 [[CMP37_NOT]], label [[WHILE_END71]], label [[WHILE_BODY38]]
484 ; CHECK: while.end71:
485 ; CHECK-NEXT: [[_CTT_2_LCSSA:%.*]] = phi i32 [ [[_CTT_0_LCSSA]], [[WHILE_END29]] ], [ [[ADD48]], [[WHILE_BODY38]] ]
486 ; CHECK-NEXT: [[_CFF_2_LCSSA:%.*]] = phi i32 [ [[_CFF_0_LCSSA]], [[WHILE_END29]] ], [ [[ADD56]], [[WHILE_BODY38]] ]
487 ; CHECK-NEXT: [[_CTF_2_LCSSA:%.*]] = phi i32 [ [[_CTF_0_LCSSA]], [[WHILE_END29]] ], [ [[ADD63]], [[WHILE_BODY38]] ]
488 ; CHECK-NEXT: [[_CFT_2_LCSSA:%.*]] = phi i32 [ [[_CFT_0_LCSSA]], [[WHILE_END29]] ], [ [[ADD70]], [[WHILE_BODY38]] ]
489 ; CHECK-NEXT: store i32 [[_CTT_2_LCSSA]], ptr [[CTT:%.*]], align 4
490 ; CHECK-NEXT: store i32 [[_CFF_2_LCSSA]], ptr [[CFF:%.*]], align 4
491 ; CHECK-NEXT: store i32 [[_CTF_2_LCSSA]], ptr [[CTF:%.*]], align 4
492 ; CHECK-NEXT: store i32 [[_CFT_2_LCSSA]], ptr [[CFT:%.*]], align 4
493 ; CHECK-NEXT: ret void
496 %cmp117 = icmp ugt i32 %numberOfBools, 31
497 br i1 %cmp117, label %while.body, label %while.end29
499 while.body: ; preds = %entry, %while.end
500 %_cft.0124 = phi i32 [ %add28, %while.end ], [ 0, %entry ]
501 %_ctf.0123 = phi i32 [ %add21, %while.end ], [ 0, %entry ]
502 %_cff.0122 = phi i32 [ %add14, %while.end ], [ 0, %entry ]
503 %_ctt.0121 = phi i32 [ %add, %while.end ], [ 0, %entry ]
504 %pA.addr.0120 = phi ptr [ %incdec.ptr, %while.end ], [ %pA, %entry ]
505 %pB.addr.0119 = phi ptr [ %incdec.ptr1, %while.end ], [ %pB, %entry ]
506 %numberOfBools.addr.0118 = phi i32 [ %sub, %while.end ], [ %numberOfBools, %entry ]
507 %0 = load i32, ptr %pA.addr.0120, align 4
508 %1 = load i32, ptr %pB.addr.0119, align 4
509 br label %while.body4
511 while.body4: ; preds = %while.body, %while.body4
512 %shift.0116 = phi i32 [ 0, %while.body ], [ %inc, %while.body4 ]
513 %b.0115 = phi i32 [ %1, %while.body ], [ %shr6, %while.body4 ]
514 %a.0114 = phi i32 [ %0, %while.body ], [ %shr, %while.body4 ]
515 %_cft.1113 = phi i32 [ %_cft.0124, %while.body ], [ %add28, %while.body4 ]
516 %_ctf.1112 = phi i32 [ %_ctf.0123, %while.body ], [ %add21, %while.body4 ]
517 %_cff.1111 = phi i32 [ %_cff.0122, %while.body ], [ %add14, %while.body4 ]
518 %_ctt.1110 = phi i32 [ %_ctt.0121, %while.body ], [ %add, %while.body4 ]
519 %and = and i32 %a.0114, 1
520 %and5 = and i32 %b.0115, 1
521 %shr = lshr i32 %a.0114, 1
522 %shr6 = lshr i32 %b.0115, 1
523 %tobool = icmp ne i32 %and, 0
524 %tobool7 = icmp ne i32 %and5, 0
525 %2 = select i1 %tobool, i1 %tobool7, i1 false
526 %land.ext = zext i1 %2 to i32
527 %add = add i32 %_ctt.1110, %land.ext
528 %tobool8 = icmp eq i32 %and, 0
529 %tobool11 = icmp eq i32 %and5, 0
530 %3 = select i1 %tobool8, i1 %tobool11, i1 false
531 %land.ext13 = zext i1 %3 to i32
532 %add14 = add i32 %_cff.1111, %land.ext13
533 %4 = select i1 %tobool, i1 %tobool11, i1 false
534 %land.ext20 = zext i1 %4 to i32
535 %add21 = add i32 %_ctf.1112, %land.ext20
536 %5 = select i1 %tobool8, i1 %tobool7, i1 false
537 %land.ext27 = zext i1 %5 to i32
538 %add28 = add i32 %_cft.1113, %land.ext27
539 %inc = add nuw nsw i32 %shift.0116, 1
540 %exitcond.not = icmp eq i32 %inc, 32
541 br i1 %exitcond.not, label %while.end, label %while.body4
543 while.end: ; preds = %while.body4
544 %incdec.ptr = getelementptr inbounds i32, ptr %pA.addr.0120, i64 1
545 %incdec.ptr1 = getelementptr inbounds i32, ptr %pB.addr.0119, i64 1
546 %sub = add i32 %numberOfBools.addr.0118, -32
547 %cmp = icmp ugt i32 %sub, 31
548 br i1 %cmp, label %while.body, label %while.end29
550 while.end29: ; preds = %while.end, %entry
551 %numberOfBools.addr.0.lcssa = phi i32 [ %numberOfBools, %entry ], [ %sub, %while.end ]
552 %pB.addr.0.lcssa = phi ptr [ %pB, %entry ], [ %incdec.ptr1, %while.end ]
553 %pA.addr.0.lcssa = phi ptr [ %pA, %entry ], [ %incdec.ptr, %while.end ]
554 %_ctt.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.end ]
555 %_cff.0.lcssa = phi i32 [ 0, %entry ], [ %add14, %while.end ]
556 %_ctf.0.lcssa = phi i32 [ 0, %entry ], [ %add21, %while.end ]
557 %_cft.0.lcssa = phi i32 [ 0, %entry ], [ %add28, %while.end ]
558 %cmp37.not131 = icmp eq i32 %numberOfBools.addr.0.lcssa, 0
559 br i1 %cmp37.not131, label %while.end71, label %while.body38.preheader
561 while.body38.preheader: ; preds = %while.end29
562 %6 = load i32, ptr %pB.addr.0.lcssa, align 4
563 %sub32 = sub nuw nsw i32 32, %numberOfBools.addr.0.lcssa
564 %shr35 = lshr i32 %6, %sub32
565 %7 = load i32, ptr %pA.addr.0.lcssa, align 4
566 %shr33 = lshr i32 %7, %sub32
567 br label %while.body38
569 while.body38: ; preds = %while.body38.preheader, %while.body38
570 %b.1138 = phi i32 [ %shr42, %while.body38 ], [ %shr35, %while.body38.preheader ]
571 %a.1137 = phi i32 [ %shr41, %while.body38 ], [ %shr33, %while.body38.preheader ]
572 %_cft.2136 = phi i32 [ %add70, %while.body38 ], [ %_cft.0.lcssa, %while.body38.preheader ]
573 %_ctf.2135 = phi i32 [ %add63, %while.body38 ], [ %_ctf.0.lcssa, %while.body38.preheader ]
574 %_cff.2134 = phi i32 [ %add56, %while.body38 ], [ %_cff.0.lcssa, %while.body38.preheader ]
575 %_ctt.2133 = phi i32 [ %add48, %while.body38 ], [ %_ctt.0.lcssa, %while.body38.preheader ]
576 %numberOfBools.addr.1132 = phi i32 [ %dec, %while.body38 ], [ %numberOfBools.addr.0.lcssa, %while.body38.preheader ]
577 %and39 = and i32 %a.1137, 1
578 %and40 = and i32 %b.1138, 1
579 %shr41 = lshr i32 %a.1137, 1
580 %shr42 = lshr i32 %b.1138, 1
581 %tobool43 = icmp ne i32 %and39, 0
582 %tobool45 = icmp ne i32 %and40, 0
583 %8 = select i1 %tobool43, i1 %tobool45, i1 false
584 %land.ext47 = zext i1 %8 to i32
585 %add48 = add i32 %_ctt.2133, %land.ext47
586 %tobool50 = icmp eq i32 %and39, 0
587 %tobool53 = icmp eq i32 %and40, 0
588 %9 = select i1 %tobool50, i1 %tobool53, i1 false
589 %land.ext55 = zext i1 %9 to i32
590 %add56 = add i32 %_cff.2134, %land.ext55
591 %10 = select i1 %tobool43, i1 %tobool53, i1 false
592 %land.ext62 = zext i1 %10 to i32
593 %add63 = add i32 %_ctf.2135, %land.ext62
594 %11 = select i1 %tobool50, i1 %tobool45, i1 false
595 %land.ext69 = zext i1 %11 to i32
596 %add70 = add i32 %_cft.2136, %land.ext69
597 %dec = add nsw i32 %numberOfBools.addr.1132, -1
598 %cmp37.not = icmp eq i32 %dec, 0
599 br i1 %cmp37.not, label %while.end71, label %while.body38
601 while.end71: ; preds = %while.body38, %while.end29
602 %_ctt.2.lcssa = phi i32 [ %_ctt.0.lcssa, %while.end29 ], [ %add48, %while.body38 ]
603 %_cff.2.lcssa = phi i32 [ %_cff.0.lcssa, %while.end29 ], [ %add56, %while.body38 ]
604 %_ctf.2.lcssa = phi i32 [ %_ctf.0.lcssa, %while.end29 ], [ %add63, %while.body38 ]
605 %_cft.2.lcssa = phi i32 [ %_cft.0.lcssa, %while.end29 ], [ %add70, %while.body38 ]
606 store i32 %_ctt.2.lcssa, ptr %cTT, align 4
607 store i32 %_cff.2.lcssa, ptr %cFF, align 4
608 store i32 %_ctf.2.lcssa, ptr %cTF, align 4
609 store i32 %_cft.2.lcssa, ptr %cFT, align 4
613 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) #1
614 declare <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8>) #2
615 declare <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16>) #2
616 declare <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32>) #2