1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
4 define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) {
6 ; CHECK: // %bb.0: // %entry
7 ; CHECK-NEXT: subs w8, w2, #1
8 ; CHECK-NEXT: b.lt .LBB0_8
9 ; CHECK-NEXT: // %bb.1: // %for.body.preheader
10 ; CHECK-NEXT: cmp w8, #6
11 ; CHECK-NEXT: b.hi .LBB0_3
12 ; CHECK-NEXT: // %bb.2:
13 ; CHECK-NEXT: mov w10, wzr
14 ; CHECK-NEXT: mov x8, x1
15 ; CHECK-NEXT: mov x9, x0
16 ; CHECK-NEXT: b .LBB0_6
17 ; CHECK-NEXT: .LBB0_3: // %vector.ph
18 ; CHECK-NEXT: add x11, x8, #1
19 ; CHECK-NEXT: mov w8, #1132396544 // =0x437f0000
20 ; CHECK-NEXT: add x12, x0, #4
21 ; CHECK-NEXT: and x10, x11, #0x1fffffff8
22 ; CHECK-NEXT: dup v0.4s, w8
23 ; CHECK-NEXT: add x13, x1, #16
24 ; CHECK-NEXT: add x8, x1, x10, lsl #2
25 ; CHECK-NEXT: add x9, x0, x10
26 ; CHECK-NEXT: mov x14, x10
27 ; CHECK-NEXT: .LBB0_4: // %vector.body
28 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
29 ; CHECK-NEXT: ldp q1, q2, [x13, #-16]
30 ; CHECK-NEXT: subs x14, x14, #8
31 ; CHECK-NEXT: add x13, x13, #32
32 ; CHECK-NEXT: fcmgt v3.4s, v1.4s, v0.4s
33 ; CHECK-NEXT: fcmgt v4.4s, v2.4s, v0.4s
34 ; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0
35 ; CHECK-NEXT: fcmlt v6.4s, v2.4s, #0.0
36 ; CHECK-NEXT: bit v1.16b, v0.16b, v3.16b
37 ; CHECK-NEXT: bit v2.16b, v0.16b, v4.16b
38 ; CHECK-NEXT: bic v1.16b, v1.16b, v5.16b
39 ; CHECK-NEXT: bic v2.16b, v2.16b, v6.16b
40 ; CHECK-NEXT: fcvtzs v1.4s, v1.4s
41 ; CHECK-NEXT: fcvtzs v2.4s, v2.4s
42 ; CHECK-NEXT: xtn v1.4h, v1.4s
43 ; CHECK-NEXT: xtn v2.4h, v2.4s
44 ; CHECK-NEXT: xtn v1.8b, v1.8h
45 ; CHECK-NEXT: xtn v2.8b, v2.8h
46 ; CHECK-NEXT: mov v1.s[1], v2.s[0]
47 ; CHECK-NEXT: stur d1, [x12, #-4]
48 ; CHECK-NEXT: add x12, x12, #8
49 ; CHECK-NEXT: b.ne .LBB0_4
50 ; CHECK-NEXT: // %bb.5: // %middle.block
51 ; CHECK-NEXT: cmp x11, x10
52 ; CHECK-NEXT: b.eq .LBB0_8
53 ; CHECK-NEXT: .LBB0_6: // %for.body.preheader1
54 ; CHECK-NEXT: movi d0, #0000000000000000
55 ; CHECK-NEXT: sub w10, w2, w10
56 ; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000
57 ; CHECK-NEXT: .LBB0_7: // %for.body
58 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
59 ; CHECK-NEXT: fmov s2, w11
60 ; CHECK-NEXT: ldr s1, [x8], #4
61 ; CHECK-NEXT: fcmp s1, s2
62 ; CHECK-NEXT: fcsel s2, s2, s1, gt
63 ; CHECK-NEXT: fcmp s1, #0.0
64 ; CHECK-NEXT: fcsel s1, s0, s2, mi
65 ; CHECK-NEXT: subs w10, w10, #1
66 ; CHECK-NEXT: fcvtzs w12, s1
67 ; CHECK-NEXT: strb w12, [x9], #1
68 ; CHECK-NEXT: b.ne .LBB0_7
69 ; CHECK-NEXT: .LBB0_8: // %for.cond.cleanup
72 %cmp9 = icmp sgt i32 %width, 0
73 br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup
75 for.body.preheader: ; preds = %entry
76 %0 = add i32 %width, -1
77 %1 = zext i32 %0 to i64
78 %2 = add nuw nsw i64 %1, 1
79 %min.iters.check = icmp ult i32 %0, 7
80 br i1 %min.iters.check, label %for.body.preheader21, label %vector.ph
82 vector.ph: ; preds = %for.body.preheader
83 %n.vec = and i64 %2, 8589934584
84 %ind.end = trunc i64 %n.vec to i32
85 %ind.end14 = getelementptr float, ptr %data, i64 %n.vec
86 %ind.end16 = getelementptr i8, ptr %dst, i64 %n.vec
89 vector.body: ; preds = %vector.body, %vector.ph
90 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
91 %next.gep = getelementptr float, ptr %data, i64 %index
92 %next.gep18 = getelementptr i8, ptr %dst, i64 %index
93 %wide.load = load <4 x float>, ptr %next.gep, align 4
94 %3 = getelementptr float, ptr %next.gep, i64 4
95 %wide.load20 = load <4 x float>, ptr %3, align 4
96 %4 = fcmp olt <4 x float> %wide.load, zeroinitializer
97 %5 = fcmp olt <4 x float> %wide.load20, zeroinitializer
98 %6 = fcmp ogt <4 x float> %wide.load, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
99 %7 = fcmp ogt <4 x float> %wide.load20, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
100 %8 = select <4 x i1> %6, <4 x float> <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>, <4 x float> %wide.load
101 %9 = select <4 x i1> %7, <4 x float> <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>, <4 x float> %wide.load20
102 %10 = select <4 x i1> %4, <4 x float> zeroinitializer, <4 x float> %8
103 %11 = select <4 x i1> %5, <4 x float> zeroinitializer, <4 x float> %9
104 %12 = fptoui <4 x float> %10 to <4 x i8>
105 %13 = fptoui <4 x float> %11 to <4 x i8>
106 store <4 x i8> %12, ptr %next.gep18, align 1
107 %14 = getelementptr i8, ptr %next.gep18, i64 4
108 store <4 x i8> %13, ptr %14, align 1
109 %index.next = add nuw i64 %index, 8
110 %15 = icmp eq i64 %index.next, %n.vec
111 br i1 %15, label %middle.block, label %vector.body
113 middle.block: ; preds = %vector.body
114 %cmp.n = icmp eq i64 %2, %n.vec
115 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader21
117 for.body.preheader21: ; preds = %for.body.preheader, %middle.block
118 %i.012.ph = phi i32 [ 0, %for.body.preheader ], [ %ind.end, %middle.block ]
119 %src.011.ph = phi ptr [ %data, %for.body.preheader ], [ %ind.end14, %middle.block ]
120 %dst.addr.010.ph = phi ptr [ %dst, %for.body.preheader ], [ %ind.end16, %middle.block ]
123 for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
126 for.body: ; preds = %for.body.preheader21, %for.body
127 %i.012 = phi i32 [ %inc, %for.body ], [ %i.012.ph, %for.body.preheader21 ]
128 %src.011 = phi ptr [ %add.ptr, %for.body ], [ %src.011.ph, %for.body.preheader21 ]
129 %dst.addr.010 = phi ptr [ %add.ptr2, %for.body ], [ %dst.addr.010.ph, %for.body.preheader21 ]
130 %16 = load float, ptr %src.011, align 4
131 %cmp.i = fcmp olt float %16, 0.000000e+00
132 %cmp1.i = fcmp ogt float %16, 2.550000e+02
133 %.x.i = select i1 %cmp1.i, float 2.550000e+02, float %16
134 %retval.0.i = select i1 %cmp.i, float 0.000000e+00, float %.x.i
135 %conv = fptoui float %retval.0.i to i8
136 store i8 %conv, ptr %dst.addr.010, align 1
137 %add.ptr = getelementptr inbounds float, ptr %src.011, i64 1
138 %add.ptr2 = getelementptr inbounds i8, ptr %dst.addr.010, i64 1
139 %inc = add nuw nsw i32 %i.012, 1
140 %exitcond.not = icmp eq i32 %inc, %width
141 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
144 define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) {
145 ; CHECK-LABEL: loop2:
146 ; CHECK: // %bb.0: // %entry
147 ; CHECK-NEXT: subs w8, w2, #1
148 ; CHECK-NEXT: b.lt .LBB1_7
149 ; CHECK-NEXT: // %bb.1: // %for.body.preheader
150 ; CHECK-NEXT: cmp w8, #2
151 ; CHECK-NEXT: b.ls .LBB1_4
152 ; CHECK-NEXT: // %bb.2: // %vector.memcheck
153 ; CHECK-NEXT: ubfiz x9, x8, #1, #32
154 ; CHECK-NEXT: add x9, x9, #2
155 ; CHECK-NEXT: add x10, x1, x9, lsl #2
156 ; CHECK-NEXT: cmp x10, x0
157 ; CHECK-NEXT: b.ls .LBB1_8
158 ; CHECK-NEXT: // %bb.3: // %vector.memcheck
159 ; CHECK-NEXT: add x9, x0, x9
160 ; CHECK-NEXT: cmp x9, x1
161 ; CHECK-NEXT: b.ls .LBB1_8
162 ; CHECK-NEXT: .LBB1_4:
163 ; CHECK-NEXT: mov w10, wzr
164 ; CHECK-NEXT: mov x8, x1
165 ; CHECK-NEXT: mov x9, x0
166 ; CHECK-NEXT: .LBB1_5: // %for.body.preheader1
167 ; CHECK-NEXT: movi d0, #0000000000000000
168 ; CHECK-NEXT: sub w10, w2, w10
169 ; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000
170 ; CHECK-NEXT: .LBB1_6: // %for.body
171 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
172 ; CHECK-NEXT: ldp s1, s3, [x8], #8
173 ; CHECK-NEXT: fmov s2, w11
174 ; CHECK-NEXT: fcmp s1, s2
175 ; CHECK-NEXT: fcsel s4, s2, s1, gt
176 ; CHECK-NEXT: fcmp s1, #0.0
177 ; CHECK-NEXT: fcsel s1, s0, s4, mi
178 ; CHECK-NEXT: fcmp s3, s2
179 ; CHECK-NEXT: fcsel s2, s2, s3, gt
180 ; CHECK-NEXT: fcmp s3, #0.0
181 ; CHECK-NEXT: fcvtzs w12, s1
182 ; CHECK-NEXT: fcsel s2, s0, s2, mi
183 ; CHECK-NEXT: subs w10, w10, #1
184 ; CHECK-NEXT: strb w12, [x9]
185 ; CHECK-NEXT: fcvtzs w13, s2
186 ; CHECK-NEXT: strb w13, [x9, #1]
187 ; CHECK-NEXT: add x9, x9, #2
188 ; CHECK-NEXT: b.ne .LBB1_6
189 ; CHECK-NEXT: .LBB1_7: // %for.cond.cleanup
191 ; CHECK-NEXT: .LBB1_8: // %vector.ph
192 ; CHECK-NEXT: add x11, x8, #1
193 ; CHECK-NEXT: mov w8, #1132396544 // =0x437f0000
194 ; CHECK-NEXT: and x10, x11, #0x1fffffffc
195 ; CHECK-NEXT: dup v0.4s, w8
196 ; CHECK-NEXT: add x8, x1, x10, lsl #3
197 ; CHECK-NEXT: add x9, x0, x10, lsl #1
198 ; CHECK-NEXT: mov x12, x10
199 ; CHECK-NEXT: .LBB1_9: // %vector.body
200 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
201 ; CHECK-NEXT: ld2 { v1.4s, v2.4s }, [x1], #32
202 ; CHECK-NEXT: fcmgt v3.4s, v1.4s, v0.4s
203 ; CHECK-NEXT: fcmgt v4.4s, v2.4s, v0.4s
204 ; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0
205 ; CHECK-NEXT: subs x12, x12, #4
206 ; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
207 ; CHECK-NEXT: fcmlt v1.4s, v2.4s, #0.0
208 ; CHECK-NEXT: bsl v4.16b, v0.16b, v2.16b
209 ; CHECK-NEXT: bic v2.16b, v3.16b, v5.16b
210 ; CHECK-NEXT: bic v1.16b, v4.16b, v1.16b
211 ; CHECK-NEXT: fcvtzs v2.4s, v2.4s
212 ; CHECK-NEXT: fcvtzs v1.4s, v1.4s
213 ; CHECK-NEXT: xtn v2.4h, v2.4s
214 ; CHECK-NEXT: xtn v1.4h, v1.4s
215 ; CHECK-NEXT: trn1 v1.8b, v2.8b, v1.8b
216 ; CHECK-NEXT: str d1, [x0], #8
217 ; CHECK-NEXT: b.ne .LBB1_9
218 ; CHECK-NEXT: // %bb.10: // %middle.block
219 ; CHECK-NEXT: cmp x11, x10
220 ; CHECK-NEXT: b.ne .LBB1_5
221 ; CHECK-NEXT: b .LBB1_7
223 %cmp19 = icmp sgt i32 %width, 0
224 br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup
226 for.body.preheader: ; preds = %entry
227 %0 = add i32 %width, -1
228 %1 = zext i32 %0 to i64
229 %2 = add nuw nsw i64 %1, 1
230 %min.iters.check = icmp ult i32 %0, 3
231 br i1 %min.iters.check, label %for.body.preheader35, label %vector.memcheck
233 vector.memcheck: ; preds = %for.body.preheader
234 %3 = add i32 %width, -1
235 %4 = zext i32 %3 to i64
236 %5 = shl nuw nsw i64 %4, 1
237 %6 = add nuw nsw i64 %5, 2
238 %scevgep = getelementptr i8, ptr %dst, i64 %6
239 %scevgep24 = getelementptr float, ptr %data, i64 %6
240 %bound0 = icmp ugt ptr %scevgep24, %dst
241 %bound1 = icmp ugt ptr %scevgep, %data
242 %found.conflict = and i1 %bound0, %bound1
243 br i1 %found.conflict, label %for.body.preheader35, label %vector.ph
245 vector.ph: ; preds = %vector.memcheck
246 %n.vec = and i64 %2, 8589934588
247 %ind.end = trunc i64 %n.vec to i32
248 %7 = shl nuw nsw i64 %n.vec, 1
249 %ind.end27 = getelementptr float, ptr %data, i64 %7
250 %8 = shl nuw nsw i64 %n.vec, 1
251 %ind.end29 = getelementptr i8, ptr %dst, i64 %8
252 br label %vector.body
254 vector.body: ; preds = %vector.body, %vector.ph
255 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
256 %9 = shl i64 %index, 1
257 %next.gep = getelementptr float, ptr %data, i64 %9
258 %10 = shl i64 %index, 1
259 %wide.vec = load <8 x float>, ptr %next.gep, align 4
260 %strided.vec = shufflevector <8 x float> %wide.vec, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
261 %strided.vec34 = shufflevector <8 x float> %wide.vec, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
262 %11 = fcmp olt <4 x float> %strided.vec, zeroinitializer
263 %12 = fcmp ogt <4 x float> %strided.vec, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
264 %13 = select <4 x i1> %12, <4 x float> <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>, <4 x float> %strided.vec
265 %14 = select <4 x i1> %11, <4 x float> zeroinitializer, <4 x float> %13
266 %15 = fptoui <4 x float> %14 to <4 x i8>
267 %16 = fcmp olt <4 x float> %strided.vec34, zeroinitializer
268 %17 = fcmp ogt <4 x float> %strided.vec34, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
269 %18 = select <4 x i1> %17, <4 x float> <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>, <4 x float> %strided.vec34
270 %19 = select <4 x i1> %16, <4 x float> zeroinitializer, <4 x float> %18
271 %20 = fptoui <4 x float> %19 to <4 x i8>
272 %21 = getelementptr inbounds i8, ptr %dst, i64 %10
273 %interleaved.vec = shufflevector <4 x i8> %15, <4 x i8> %20, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
274 store <8 x i8> %interleaved.vec, ptr %21, align 1
275 %index.next = add nuw i64 %index, 4
276 %22 = icmp eq i64 %index.next, %n.vec
277 br i1 %22, label %middle.block, label %vector.body
279 middle.block: ; preds = %vector.body
280 %cmp.n = icmp eq i64 %2, %n.vec
281 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader35
283 for.body.preheader35: ; preds = %vector.memcheck, %for.body.preheader, %middle.block
284 %i.022.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ]
285 %src.021.ph = phi ptr [ %data, %vector.memcheck ], [ %data, %for.body.preheader ], [ %ind.end27, %middle.block ]
286 %dst.addr.020.ph = phi ptr [ %dst, %vector.memcheck ], [ %dst, %for.body.preheader ], [ %ind.end29, %middle.block ]
289 for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
292 for.body: ; preds = %for.body.preheader35, %for.body
293 %i.022 = phi i32 [ %inc, %for.body ], [ %i.022.ph, %for.body.preheader35 ]
294 %src.021 = phi ptr [ %add.ptr, %for.body ], [ %src.021.ph, %for.body.preheader35 ]
295 %dst.addr.020 = phi ptr [ %add.ptr6, %for.body ], [ %dst.addr.020.ph, %for.body.preheader35 ]
296 %23 = load float, ptr %src.021, align 4
297 %cmp.i = fcmp olt float %23, 0.000000e+00
298 %cmp1.i = fcmp ogt float %23, 2.550000e+02
299 %.x.i = select i1 %cmp1.i, float 2.550000e+02, float %23
300 %retval.0.i = select i1 %cmp.i, float 0.000000e+00, float %.x.i
301 %conv = fptoui float %retval.0.i to i8
302 store i8 %conv, ptr %dst.addr.020, align 1
303 %arrayidx2 = getelementptr inbounds float, ptr %src.021, i64 1
304 %24 = load float, ptr %arrayidx2, align 4
305 %cmp.i15 = fcmp olt float %24, 0.000000e+00
306 %cmp1.i16 = fcmp ogt float %24, 2.550000e+02
307 %.x.i17 = select i1 %cmp1.i16, float 2.550000e+02, float %24
308 %retval.0.i18 = select i1 %cmp.i15, float 0.000000e+00, float %.x.i17
309 %conv4 = fptoui float %retval.0.i18 to i8
310 %arrayidx5 = getelementptr inbounds i8, ptr %dst.addr.020, i64 1
311 store i8 %conv4, ptr %arrayidx5, align 1
312 %add.ptr = getelementptr inbounds float, ptr %src.021, i64 2
313 %add.ptr6 = getelementptr inbounds i8, ptr %dst.addr.020, i64 2
314 %inc = add nuw nsw i32 %i.022, 1
315 %exitcond.not = icmp eq i32 %inc, %width
316 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
319 define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) {
320 ; CHECK-LABEL: loop3:
321 ; CHECK: // %bb.0: // %entry
322 ; CHECK-NEXT: subs w8, w2, #1
323 ; CHECK-NEXT: b.lt .LBB2_9
324 ; CHECK-NEXT: // %bb.1: // %for.body.preheader
325 ; CHECK-NEXT: cmp w8, #2
326 ; CHECK-NEXT: b.ls .LBB2_6
327 ; CHECK-NEXT: // %bb.2: // %vector.memcheck
328 ; CHECK-NEXT: add x9, x8, w8, uxtw #1
329 ; CHECK-NEXT: add x9, x9, #3
330 ; CHECK-NEXT: add x10, x1, x9, lsl #2
331 ; CHECK-NEXT: add x9, x0, x9
332 ; CHECK-NEXT: cmp x10, x0
333 ; CHECK-NEXT: ccmp x9, x1, #0, hi
334 ; CHECK-NEXT: b.hi .LBB2_6
335 ; CHECK-NEXT: // %bb.3: // %vector.ph
336 ; CHECK-NEXT: add x11, x8, #1
337 ; CHECK-NEXT: mov w8, #1132396544 // =0x437f0000
338 ; CHECK-NEXT: adrp x12, .LCPI2_0
339 ; CHECK-NEXT: and x10, x11, #0x1fffffffc
340 ; CHECK-NEXT: dup v0.4s, w8
341 ; CHECK-NEXT: ldr q1, [x12, :lo12:.LCPI2_0]
342 ; CHECK-NEXT: add x9, x10, x10, lsl #1
343 ; CHECK-NEXT: mov x12, x10
344 ; CHECK-NEXT: add x8, x1, x9, lsl #2
345 ; CHECK-NEXT: add x9, x0, x9
346 ; CHECK-NEXT: .LBB2_4: // %vector.body
347 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
348 ; CHECK-NEXT: ld3 { v2.4s, v3.4s, v4.4s }, [x1], #48
349 ; CHECK-NEXT: fcmgt v5.4s, v2.4s, v0.4s
350 ; CHECK-NEXT: fcmgt v6.4s, v3.4s, v0.4s
351 ; CHECK-NEXT: fcmgt v7.4s, v4.4s, v0.4s
352 ; CHECK-NEXT: fcmlt v16.4s, v2.4s, #0.0
353 ; CHECK-NEXT: fcmlt v17.4s, v3.4s, #0.0
354 ; CHECK-NEXT: add x13, x0, #8
355 ; CHECK-NEXT: subs x12, x12, #4
356 ; CHECK-NEXT: bsl v5.16b, v0.16b, v2.16b
357 ; CHECK-NEXT: fcmlt v2.4s, v4.4s, #0.0
358 ; CHECK-NEXT: bsl v6.16b, v0.16b, v3.16b
359 ; CHECK-NEXT: bsl v7.16b, v0.16b, v4.16b
360 ; CHECK-NEXT: bic v3.16b, v5.16b, v16.16b
361 ; CHECK-NEXT: bic v4.16b, v6.16b, v17.16b
362 ; CHECK-NEXT: bic v2.16b, v7.16b, v2.16b
363 ; CHECK-NEXT: fcvtzs v3.4s, v3.4s
364 ; CHECK-NEXT: fcvtzs v4.4s, v4.4s
365 ; CHECK-NEXT: fcvtzs v2.4s, v2.4s
366 ; CHECK-NEXT: xtn v5.4h, v3.4s
367 ; CHECK-NEXT: xtn v6.4h, v4.4s
368 ; CHECK-NEXT: xtn v7.4h, v2.4s
369 ; CHECK-NEXT: tbl v2.16b, { v5.16b, v6.16b, v7.16b }, v1.16b
370 ; CHECK-NEXT: st1 { v2.s }[2], [x13]
371 ; CHECK-NEXT: str d2, [x0], #12
372 ; CHECK-NEXT: b.ne .LBB2_4
373 ; CHECK-NEXT: // %bb.5: // %middle.block
374 ; CHECK-NEXT: cmp x11, x10
375 ; CHECK-NEXT: b.ne .LBB2_7
376 ; CHECK-NEXT: b .LBB2_9
377 ; CHECK-NEXT: .LBB2_6:
378 ; CHECK-NEXT: mov w10, wzr
379 ; CHECK-NEXT: mov x8, x1
380 ; CHECK-NEXT: mov x9, x0
381 ; CHECK-NEXT: .LBB2_7: // %for.body.preheader1
382 ; CHECK-NEXT: movi d0, #0000000000000000
383 ; CHECK-NEXT: sub w10, w2, w10
384 ; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000
385 ; CHECK-NEXT: .LBB2_8: // %for.body
386 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
387 ; CHECK-NEXT: ldp s1, s3, [x8]
388 ; CHECK-NEXT: fmov s2, w11
389 ; CHECK-NEXT: fcmp s1, s2
390 ; CHECK-NEXT: fcsel s4, s2, s1, gt
391 ; CHECK-NEXT: fcmp s1, #0.0
392 ; CHECK-NEXT: fcsel s1, s0, s4, mi
393 ; CHECK-NEXT: fcmp s3, s2
394 ; CHECK-NEXT: fcsel s4, s2, s3, gt
395 ; CHECK-NEXT: fcmp s3, #0.0
396 ; CHECK-NEXT: ldr s3, [x8, #8]
397 ; CHECK-NEXT: fcvtzs w12, s1
398 ; CHECK-NEXT: add x8, x8, #12
399 ; CHECK-NEXT: fcsel s4, s0, s4, mi
400 ; CHECK-NEXT: fcmp s3, s2
401 ; CHECK-NEXT: strb w12, [x9]
402 ; CHECK-NEXT: fcsel s2, s2, s3, gt
403 ; CHECK-NEXT: fcmp s3, #0.0
404 ; CHECK-NEXT: fcvtzs w13, s4
405 ; CHECK-NEXT: fcsel s2, s0, s2, mi
406 ; CHECK-NEXT: subs w10, w10, #1
407 ; CHECK-NEXT: strb w13, [x9, #1]
408 ; CHECK-NEXT: fcvtzs w14, s2
409 ; CHECK-NEXT: strb w14, [x9, #2]
410 ; CHECK-NEXT: add x9, x9, #3
411 ; CHECK-NEXT: b.ne .LBB2_8
412 ; CHECK-NEXT: .LBB2_9: // %for.cond.cleanup
415 %cmp29 = icmp sgt i32 %width, 0
416 br i1 %cmp29, label %for.body.preheader, label %for.cond.cleanup
418 for.body.preheader: ; preds = %entry
419 %0 = add i32 %width, -1
420 %1 = zext i32 %0 to i64
421 %2 = add nuw nsw i64 %1, 1
422 %min.iters.check = icmp ult i32 %0, 3
423 br i1 %min.iters.check, label %for.body.preheader46, label %vector.memcheck
425 vector.memcheck: ; preds = %for.body.preheader
426 %3 = add i32 %width, -1
427 %4 = zext i32 %3 to i64
428 %5 = mul nuw nsw i64 %4, 3
429 %6 = add nuw nsw i64 %5, 3
430 %scevgep = getelementptr i8, ptr %dst, i64 %6
431 %scevgep34 = getelementptr float, ptr %data, i64 %6
432 %bound0 = icmp ugt ptr %scevgep34, %dst
433 %bound1 = icmp ugt ptr %scevgep, %data
434 %found.conflict = and i1 %bound0, %bound1
435 br i1 %found.conflict, label %for.body.preheader46, label %vector.ph
437 vector.ph: ; preds = %vector.memcheck
438 %n.vec = and i64 %2, 8589934588
439 %ind.end = trunc i64 %n.vec to i32
440 %7 = mul nuw nsw i64 %n.vec, 3
441 %ind.end37 = getelementptr float, ptr %data, i64 %7
442 %8 = mul nuw nsw i64 %n.vec, 3
443 %ind.end39 = getelementptr i8, ptr %dst, i64 %8
444 br label %vector.body
446 vector.body: ; preds = %vector.body, %vector.ph
447 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
448 %9 = mul i64 %index, 3
449 %next.gep = getelementptr float, ptr %data, i64 %9
450 %10 = mul i64 %index, 3
451 %wide.vec = load <12 x float>, ptr %next.gep, align 4
452 %strided.vec = shufflevector <12 x float> %wide.vec, <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
453 %strided.vec44 = shufflevector <12 x float> %wide.vec, <12 x float> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
454 %strided.vec45 = shufflevector <12 x float> %wide.vec, <12 x float> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
455 %11 = fcmp olt <4 x float> %strided.vec, zeroinitializer
456 %12 = fcmp ogt <4 x float> %strided.vec, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
457 %13 = select <4 x i1> %12, <4 x float> <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>, <4 x float> %strided.vec
458 %14 = select <4 x i1> %11, <4 x float> zeroinitializer, <4 x float> %13
459 %15 = fptoui <4 x float> %14 to <4 x i8>
460 %16 = fcmp olt <4 x float> %strided.vec44, zeroinitializer
461 %17 = fcmp ogt <4 x float> %strided.vec44, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
462 %18 = select <4 x i1> %17, <4 x float> <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>, <4 x float> %strided.vec44
463 %19 = select <4 x i1> %16, <4 x float> zeroinitializer, <4 x float> %18
464 %20 = fptoui <4 x float> %19 to <4 x i8>
465 %21 = fcmp olt <4 x float> %strided.vec45, zeroinitializer
466 %22 = fcmp ogt <4 x float> %strided.vec45, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
467 %23 = select <4 x i1> %22, <4 x float> <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>, <4 x float> %strided.vec45
468 %24 = select <4 x i1> %21, <4 x float> zeroinitializer, <4 x float> %23
469 %25 = fptoui <4 x float> %24 to <4 x i8>
470 %26 = getelementptr inbounds i8, ptr %dst, i64 %10
471 %27 = shufflevector <4 x i8> %15, <4 x i8> %20, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
472 %28 = shufflevector <4 x i8> %25, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
473 %interleaved.vec = shufflevector <8 x i8> %27, <8 x i8> %28, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
474 store <12 x i8> %interleaved.vec, ptr %26, align 1
475 %index.next = add nuw i64 %index, 4
476 %29 = icmp eq i64 %index.next, %n.vec
477 br i1 %29, label %middle.block, label %vector.body
479 middle.block: ; preds = %vector.body
480 %cmp.n = icmp eq i64 %2, %n.vec
481 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader46
483 for.body.preheader46: ; preds = %vector.memcheck, %for.body.preheader, %middle.block
484 %i.032.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ]
485 %src.031.ph = phi ptr [ %data, %vector.memcheck ], [ %data, %for.body.preheader ], [ %ind.end37, %middle.block ]
486 %dst.addr.030.ph = phi ptr [ %dst, %vector.memcheck ], [ %dst, %for.body.preheader ], [ %ind.end39, %middle.block ]
489 for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
492 for.body: ; preds = %for.body.preheader46, %for.body
493 %i.032 = phi i32 [ %inc, %for.body ], [ %i.032.ph, %for.body.preheader46 ]
494 %src.031 = phi ptr [ %add.ptr, %for.body ], [ %src.031.ph, %for.body.preheader46 ]
495 %dst.addr.030 = phi ptr [ %add.ptr10, %for.body ], [ %dst.addr.030.ph, %for.body.preheader46 ]
496 %30 = load float, ptr %src.031, align 4
497 %cmp.i = fcmp olt float %30, 0.000000e+00
498 %cmp1.i = fcmp ogt float %30, 2.550000e+02
499 %.x.i = select i1 %cmp1.i, float 2.550000e+02, float %30
500 %retval.0.i = select i1 %cmp.i, float 0.000000e+00, float %.x.i
501 %conv = fptoui float %retval.0.i to i8
502 store i8 %conv, ptr %dst.addr.030, align 1
503 %arrayidx2 = getelementptr inbounds float, ptr %src.031, i64 1
504 %31 = load float, ptr %arrayidx2, align 4
505 %cmp.i21 = fcmp olt float %31, 0.000000e+00
506 %cmp1.i22 = fcmp ogt float %31, 2.550000e+02
507 %.x.i23 = select i1 %cmp1.i22, float 2.550000e+02, float %31
508 %retval.0.i24 = select i1 %cmp.i21, float 0.000000e+00, float %.x.i23
509 %conv4 = fptoui float %retval.0.i24 to i8
510 %arrayidx5 = getelementptr inbounds i8, ptr %dst.addr.030, i64 1
511 store i8 %conv4, ptr %arrayidx5, align 1
512 %arrayidx6 = getelementptr inbounds float, ptr %src.031, i64 2
513 %32 = load float, ptr %arrayidx6, align 4
514 %cmp.i25 = fcmp olt float %32, 0.000000e+00
515 %cmp1.i26 = fcmp ogt float %32, 2.550000e+02
516 %.x.i27 = select i1 %cmp1.i26, float 2.550000e+02, float %32
517 %retval.0.i28 = select i1 %cmp.i25, float 0.000000e+00, float %.x.i27
518 %conv8 = fptoui float %retval.0.i28 to i8
519 %arrayidx9 = getelementptr inbounds i8, ptr %dst.addr.030, i64 2
520 store i8 %conv8, ptr %arrayidx9, align 1
521 %add.ptr = getelementptr inbounds float, ptr %src.031, i64 3
522 %add.ptr10 = getelementptr inbounds i8, ptr %dst.addr.030, i64 3
523 %inc = add nuw nsw i32 %i.032, 1
524 %exitcond.not = icmp eq i32 %inc, %width
525 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
528 define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %data, i32 noundef %width) {
529 ; CHECK-LABEL: loop4:
530 ; CHECK: // %bb.0: // %entry
531 ; CHECK-NEXT: subs w8, w2, #1
532 ; CHECK-NEXT: b.lt .LBB3_7
533 ; CHECK-NEXT: // %bb.1: // %for.body.preheader
534 ; CHECK-NEXT: cmp w8, #2
535 ; CHECK-NEXT: b.ls .LBB3_4
536 ; CHECK-NEXT: // %bb.2: // %vector.memcheck
537 ; CHECK-NEXT: ubfiz x9, x8, #2, #32
538 ; CHECK-NEXT: add x9, x9, #4
539 ; CHECK-NEXT: add x10, x1, x9, lsl #2
540 ; CHECK-NEXT: cmp x10, x0
541 ; CHECK-NEXT: b.ls .LBB3_8
542 ; CHECK-NEXT: // %bb.3: // %vector.memcheck
543 ; CHECK-NEXT: add x9, x0, x9
544 ; CHECK-NEXT: cmp x9, x1
545 ; CHECK-NEXT: b.ls .LBB3_8
546 ; CHECK-NEXT: .LBB3_4:
547 ; CHECK-NEXT: mov w10, wzr
548 ; CHECK-NEXT: mov x8, x1
549 ; CHECK-NEXT: mov x9, x0
550 ; CHECK-NEXT: .LBB3_5: // %for.body.preheader1
551 ; CHECK-NEXT: movi d0, #0000000000000000
552 ; CHECK-NEXT: sub w10, w2, w10
553 ; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000
554 ; CHECK-NEXT: .LBB3_6: // %for.body
555 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
556 ; CHECK-NEXT: ldp s1, s3, [x8]
557 ; CHECK-NEXT: fmov s2, w11
558 ; CHECK-NEXT: fcmp s1, s2
559 ; CHECK-NEXT: fcsel s4, s2, s1, gt
560 ; CHECK-NEXT: fcmp s1, #0.0
561 ; CHECK-NEXT: fcsel s1, s0, s4, mi
562 ; CHECK-NEXT: fcmp s3, s2
563 ; CHECK-NEXT: fcsel s4, s2, s3, gt
564 ; CHECK-NEXT: fcmp s3, #0.0
565 ; CHECK-NEXT: ldp s3, s5, [x8, #8]
566 ; CHECK-NEXT: fcvtzs w12, s1
567 ; CHECK-NEXT: add x8, x8, #16
568 ; CHECK-NEXT: fcsel s4, s0, s4, mi
569 ; CHECK-NEXT: fcmp s3, s2
570 ; CHECK-NEXT: strb w12, [x9]
571 ; CHECK-NEXT: fcsel s6, s2, s3, gt
572 ; CHECK-NEXT: fcmp s3, #0.0
573 ; CHECK-NEXT: fcvtzs w13, s4
574 ; CHECK-NEXT: fcsel s3, s0, s6, mi
575 ; CHECK-NEXT: fcmp s5, s2
576 ; CHECK-NEXT: strb w13, [x9, #1]
577 ; CHECK-NEXT: fcsel s2, s2, s5, gt
578 ; CHECK-NEXT: fcmp s5, #0.0
579 ; CHECK-NEXT: fcvtzs w14, s3
580 ; CHECK-NEXT: fcsel s2, s0, s2, mi
581 ; CHECK-NEXT: subs w10, w10, #1
582 ; CHECK-NEXT: strb w14, [x9, #2]
583 ; CHECK-NEXT: fcvtzs w15, s2
584 ; CHECK-NEXT: strb w15, [x9, #3]
585 ; CHECK-NEXT: add x9, x9, #4
586 ; CHECK-NEXT: b.ne .LBB3_6
587 ; CHECK-NEXT: .LBB3_7: // %for.cond.cleanup
589 ; CHECK-NEXT: .LBB3_8: // %vector.ph
590 ; CHECK-NEXT: add x11, x8, #1
591 ; CHECK-NEXT: mov w8, #1132396544 // =0x437f0000
592 ; CHECK-NEXT: adrp x12, .LCPI3_0
593 ; CHECK-NEXT: and x10, x11, #0x1fffffffc
594 ; CHECK-NEXT: dup v0.4s, w8
595 ; CHECK-NEXT: ldr q1, [x12, :lo12:.LCPI3_0]
596 ; CHECK-NEXT: add x8, x1, x10, lsl #4
597 ; CHECK-NEXT: add x9, x0, x10, lsl #2
598 ; CHECK-NEXT: mov x12, x10
599 ; CHECK-NEXT: .LBB3_9: // %vector.body
600 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
601 ; CHECK-NEXT: ld4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x1], #64
602 ; CHECK-NEXT: fcmgt v6.4s, v2.4s, v0.4s
603 ; CHECK-NEXT: fcmgt v7.4s, v3.4s, v0.4s
604 ; CHECK-NEXT: fcmgt v16.4s, v4.4s, v0.4s
605 ; CHECK-NEXT: fcmgt v17.4s, v5.4s, v0.4s
606 ; CHECK-NEXT: fcmlt v18.4s, v2.4s, #0.0
607 ; CHECK-NEXT: fcmlt v19.4s, v3.4s, #0.0
608 ; CHECK-NEXT: subs x12, x12, #4
609 ; CHECK-NEXT: fcmlt v20.4s, v4.4s, #0.0
610 ; CHECK-NEXT: bsl v6.16b, v0.16b, v2.16b
611 ; CHECK-NEXT: fcmlt v2.4s, v5.4s, #0.0
612 ; CHECK-NEXT: bsl v7.16b, v0.16b, v3.16b
613 ; CHECK-NEXT: bsl v16.16b, v0.16b, v4.16b
614 ; CHECK-NEXT: bsl v17.16b, v0.16b, v5.16b
615 ; CHECK-NEXT: bic v3.16b, v6.16b, v18.16b
616 ; CHECK-NEXT: bic v4.16b, v7.16b, v19.16b
617 ; CHECK-NEXT: bic v5.16b, v16.16b, v20.16b
618 ; CHECK-NEXT: bic v2.16b, v17.16b, v2.16b
619 ; CHECK-NEXT: fcvtzs v3.4s, v3.4s
620 ; CHECK-NEXT: fcvtzs v4.4s, v4.4s
621 ; CHECK-NEXT: fcvtzs v5.4s, v5.4s
622 ; CHECK-NEXT: fcvtzs v2.4s, v2.4s
623 ; CHECK-NEXT: xtn v16.4h, v3.4s
624 ; CHECK-NEXT: xtn v17.4h, v4.4s
625 ; CHECK-NEXT: xtn v18.4h, v5.4s
626 ; CHECK-NEXT: xtn v19.4h, v2.4s
627 ; CHECK-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
628 ; CHECK-NEXT: str q2, [x0], #16
629 ; CHECK-NEXT: b.ne .LBB3_9
630 ; CHECK-NEXT: // %bb.10: // %middle.block
631 ; CHECK-NEXT: cmp x11, x10
632 ; CHECK-NEXT: b.ne .LBB3_5
633 ; CHECK-NEXT: b .LBB3_7
635 %cmp39 = icmp sgt i32 %width, 0
636 br i1 %cmp39, label %for.body.preheader, label %for.cond.cleanup
638 for.body.preheader: ; preds = %entry
639 %0 = add i32 %width, -1
640 %1 = zext i32 %0 to i64
641 %2 = add nuw nsw i64 %1, 1
642 %min.iters.check = icmp ult i32 %0, 3
643 br i1 %min.iters.check, label %for.body.preheader57, label %vector.memcheck
645 vector.memcheck: ; preds = %for.body.preheader
646 %3 = add i32 %width, -1
647 %4 = zext i32 %3 to i64
648 %5 = shl nuw nsw i64 %4, 2
649 %6 = add nuw nsw i64 %5, 4
650 %scevgep = getelementptr i8, ptr %dst, i64 %6
651 %scevgep44 = getelementptr float, ptr %data, i64 %6
652 %bound0 = icmp ugt ptr %scevgep44, %dst
653 %bound1 = icmp ugt ptr %scevgep, %data
654 %found.conflict = and i1 %bound0, %bound1
655 br i1 %found.conflict, label %for.body.preheader57, label %vector.ph
657 vector.ph: ; preds = %vector.memcheck
658 %n.vec = and i64 %2, 8589934588
659 %ind.end = trunc i64 %n.vec to i32
660 %7 = shl nuw nsw i64 %n.vec, 2
661 %ind.end47 = getelementptr float, ptr %data, i64 %7
662 %8 = shl nuw nsw i64 %n.vec, 2
663 %ind.end49 = getelementptr i8, ptr %dst, i64 %8
664 br label %vector.body
666 vector.body: ; preds = %vector.body, %vector.ph
667 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
668 %9 = shl i64 %index, 2
669 %next.gep = getelementptr float, ptr %data, i64 %9
670 %10 = shl i64 %index, 2
671 %wide.vec = load <16 x float>, ptr %next.gep, align 4
672 %strided.vec = shufflevector <16 x float> %wide.vec, <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
673 %strided.vec54 = shufflevector <16 x float> %wide.vec, <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
674 %strided.vec55 = shufflevector <16 x float> %wide.vec, <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
675 %strided.vec56 = shufflevector <16 x float> %wide.vec, <16 x float> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
676 %11 = fcmp olt <4 x float> %strided.vec, zeroinitializer
677 %12 = fcmp ogt <4 x float> %strided.vec, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
678 %13 = select <4 x i1> %12, <4 x float> <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>, <4 x float> %strided.vec
679 %14 = select <4 x i1> %11, <4 x float> zeroinitializer, <4 x float> %13
680 %15 = fptoui <4 x float> %14 to <4 x i8>
681 %16 = fcmp olt <4 x float> %strided.vec54, zeroinitializer
682 %17 = fcmp ogt <4 x float> %strided.vec54, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
683 %18 = select <4 x i1> %17, <4 x float> <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>, <4 x float> %strided.vec54
684 %19 = select <4 x i1> %16, <4 x float> zeroinitializer, <4 x float> %18
685 %20 = fptoui <4 x float> %19 to <4 x i8>
686 %21 = fcmp olt <4 x float> %strided.vec55, zeroinitializer
687 %22 = fcmp ogt <4 x float> %strided.vec55, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
688 %23 = select <4 x i1> %22, <4 x float> <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>, <4 x float> %strided.vec55
689 %24 = select <4 x i1> %21, <4 x float> zeroinitializer, <4 x float> %23
690 %25 = fptoui <4 x float> %24 to <4 x i8>
691 %26 = fcmp olt <4 x float> %strided.vec56, zeroinitializer
692 %27 = fcmp ogt <4 x float> %strided.vec56, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
693 %28 = select <4 x i1> %27, <4 x float> <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>, <4 x float> %strided.vec56
694 %29 = select <4 x i1> %26, <4 x float> zeroinitializer, <4 x float> %28
695 %30 = fptoui <4 x float> %29 to <4 x i8>
696 %31 = getelementptr inbounds i8, ptr %dst, i64 %10
697 %32 = shufflevector <4 x i8> %15, <4 x i8> %20, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
698 %33 = shufflevector <4 x i8> %25, <4 x i8> %30, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
699 %interleaved.vec = shufflevector <8 x i8> %32, <8 x i8> %33, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
700 store <16 x i8> %interleaved.vec, ptr %31, align 1
701 %index.next = add nuw i64 %index, 4
702 %34 = icmp eq i64 %index.next, %n.vec
703 br i1 %34, label %middle.block, label %vector.body
705 middle.block: ; preds = %vector.body
706 %cmp.n = icmp eq i64 %2, %n.vec
707 br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader57
709 for.body.preheader57: ; preds = %vector.memcheck, %for.body.preheader, %middle.block
710 %i.042.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ]
711 %src.041.ph = phi ptr [ %data, %vector.memcheck ], [ %data, %for.body.preheader ], [ %ind.end47, %middle.block ]
712 %dst.addr.040.ph = phi ptr [ %dst, %vector.memcheck ], [ %dst, %for.body.preheader ], [ %ind.end49, %middle.block ]
715 for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
718 for.body: ; preds = %for.body.preheader57, %for.body
719 %i.042 = phi i32 [ %inc, %for.body ], [ %i.042.ph, %for.body.preheader57 ]
720 %src.041 = phi ptr [ %add.ptr, %for.body ], [ %src.041.ph, %for.body.preheader57 ]
721 %dst.addr.040 = phi ptr [ %add.ptr14, %for.body ], [ %dst.addr.040.ph, %for.body.preheader57 ]
722 %35 = load float, ptr %src.041, align 4
723 %cmp.i = fcmp olt float %35, 0.000000e+00
724 %cmp1.i = fcmp ogt float %35, 2.550000e+02
725 %.x.i = select i1 %cmp1.i, float 2.550000e+02, float %35
726 %retval.0.i = select i1 %cmp.i, float 0.000000e+00, float %.x.i
727 %conv = fptoui float %retval.0.i to i8
728 store i8 %conv, ptr %dst.addr.040, align 1
729 %arrayidx2 = getelementptr inbounds float, ptr %src.041, i64 1
730 %36 = load float, ptr %arrayidx2, align 4
731 %cmp.i27 = fcmp olt float %36, 0.000000e+00
732 %cmp1.i28 = fcmp ogt float %36, 2.550000e+02
733 %.x.i29 = select i1 %cmp1.i28, float 2.550000e+02, float %36
734 %retval.0.i30 = select i1 %cmp.i27, float 0.000000e+00, float %.x.i29
735 %conv4 = fptoui float %retval.0.i30 to i8
736 %arrayidx5 = getelementptr inbounds i8, ptr %dst.addr.040, i64 1
737 store i8 %conv4, ptr %arrayidx5, align 1
738 %arrayidx6 = getelementptr inbounds float, ptr %src.041, i64 2
739 %37 = load float, ptr %arrayidx6, align 4
740 %cmp.i31 = fcmp olt float %37, 0.000000e+00
741 %cmp1.i32 = fcmp ogt float %37, 2.550000e+02
742 %.x.i33 = select i1 %cmp1.i32, float 2.550000e+02, float %37
743 %retval.0.i34 = select i1 %cmp.i31, float 0.000000e+00, float %.x.i33
744 %conv8 = fptoui float %retval.0.i34 to i8
745 %arrayidx9 = getelementptr inbounds i8, ptr %dst.addr.040, i64 2
746 store i8 %conv8, ptr %arrayidx9, align 1
747 %arrayidx10 = getelementptr inbounds float, ptr %src.041, i64 3
748 %38 = load float, ptr %arrayidx10, align 4
749 %cmp.i35 = fcmp olt float %38, 0.000000e+00
750 %cmp1.i36 = fcmp ogt float %38, 2.550000e+02
751 %.x.i37 = select i1 %cmp1.i36, float 2.550000e+02, float %38
752 %retval.0.i38 = select i1 %cmp.i35, float 0.000000e+00, float %.x.i37
753 %conv12 = fptoui float %retval.0.i38 to i8
754 %arrayidx13 = getelementptr inbounds i8, ptr %dst.addr.040, i64 3
755 store i8 %conv12, ptr %arrayidx13, align 1
756 %add.ptr = getelementptr inbounds float, ptr %src.041, i64 4
757 %add.ptr14 = getelementptr inbounds i8, ptr %dst.addr.040, i64 4
758 %inc = add nuw nsw i32 %i.042, 1
759 %exitcond.not = icmp eq i32 %inc, %width
760 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body