1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3 ; RUN: llc < %s -mtriple=thumbv8m.base-arm-none-eabi < %s | FileCheck %s
5 define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly %pDst, i32 noundef %blockSize) {
6 ; CHECK-LABEL: arm_q15_to_q31:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
9 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
11 ; CHECK-NEXT: sub sp, #8
12 ; CHECK-NEXT: mov r7, r2
13 ; CHECK-NEXT: lsrs r3, r2, #2
14 ; CHECK-NEXT: beq .LBB0_6
15 ; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
16 ; CHECK-NEXT: movs r5, #3
17 ; CHECK-NEXT: ands r5, r3
18 ; CHECK-NEXT: subs r2, r3, #1
19 ; CHECK-NEXT: cbz r5, .LBB0_4
20 ; CHECK-NEXT: @ %bb.2: @ %while.body.prol
21 ; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
22 ; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
23 ; CHECK-NEXT: ldrh r2, [r0]
24 ; CHECK-NEXT: ldrh r7, [r0, #2]
25 ; CHECK-NEXT: ldrh r4, [r0, #4]
26 ; CHECK-NEXT: ldrh r6, [r0, #6]
27 ; CHECK-NEXT: lsls r6, r6, #16
28 ; CHECK-NEXT: lsls r4, r4, #16
29 ; CHECK-NEXT: lsls r7, r7, #16
30 ; CHECK-NEXT: lsls r2, r2, #16
31 ; CHECK-NEXT: stm r1!, {r2, r7}
32 ; CHECK-NEXT: str r4, [r1]
33 ; CHECK-NEXT: str r6, [r1, #4]
34 ; CHECK-NEXT: subs r1, #8
35 ; CHECK-NEXT: cmp r5, #1
36 ; CHECK-NEXT: bne .LBB0_11
37 ; CHECK-NEXT: @ %bb.3:
38 ; CHECK-NEXT: adds r1, #16
39 ; CHECK-NEXT: adds r0, #8
40 ; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
41 ; CHECK-NEXT: mov r3, r2
42 ; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
43 ; CHECK-NEXT: .LBB0_4: @ %while.body.prol.loopexit
44 ; CHECK-NEXT: cmp r2, #3
45 ; CHECK-NEXT: blo .LBB0_6
46 ; CHECK-NEXT: .LBB0_5: @ %while.body
47 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
48 ; CHECK-NEXT: ldrh r2, [r0]
49 ; CHECK-NEXT: ldrh r4, [r0, #2]
50 ; CHECK-NEXT: ldrh r5, [r0, #4]
51 ; CHECK-NEXT: ldrh r6, [r0, #6]
52 ; CHECK-NEXT: lsls r6, r6, #16
53 ; CHECK-NEXT: str r6, [r1, #12]
54 ; CHECK-NEXT: lsls r5, r5, #16
55 ; CHECK-NEXT: str r5, [r1, #8]
56 ; CHECK-NEXT: lsls r4, r4, #16
57 ; CHECK-NEXT: str r4, [r1, #4]
58 ; CHECK-NEXT: lsls r2, r2, #16
59 ; CHECK-NEXT: str r2, [r1]
60 ; CHECK-NEXT: ldrh r2, [r0, #8]
61 ; CHECK-NEXT: ldrh r4, [r0, #10]
62 ; CHECK-NEXT: ldrh r5, [r0, #12]
63 ; CHECK-NEXT: ldrh r6, [r0, #14]
64 ; CHECK-NEXT: lsls r6, r6, #16
65 ; CHECK-NEXT: str r6, [r1, #28]
66 ; CHECK-NEXT: lsls r5, r5, #16
67 ; CHECK-NEXT: str r5, [r1, #24]
68 ; CHECK-NEXT: lsls r4, r4, #16
69 ; CHECK-NEXT: str r4, [r1, #20]
70 ; CHECK-NEXT: lsls r2, r2, #16
71 ; CHECK-NEXT: str r2, [r1, #16]
72 ; CHECK-NEXT: ldrh r2, [r0, #16]
73 ; CHECK-NEXT: ldrh r4, [r0, #18]
74 ; CHECK-NEXT: ldrh r5, [r0, #20]
75 ; CHECK-NEXT: ldrh r6, [r0, #22]
76 ; CHECK-NEXT: lsls r6, r6, #16
77 ; CHECK-NEXT: str r6, [r1, #44]
78 ; CHECK-NEXT: lsls r5, r5, #16
79 ; CHECK-NEXT: str r5, [r1, #40]
80 ; CHECK-NEXT: lsls r4, r4, #16
81 ; CHECK-NEXT: str r4, [r1, #36]
82 ; CHECK-NEXT: lsls r2, r2, #16
83 ; CHECK-NEXT: str r2, [r1, #32]
84 ; CHECK-NEXT: ldrh r2, [r0, #24]
85 ; CHECK-NEXT: ldrh r4, [r0, #26]
86 ; CHECK-NEXT: ldrh r5, [r0, #28]
87 ; CHECK-NEXT: ldrh r6, [r0, #30]
88 ; CHECK-NEXT: lsls r6, r6, #16
89 ; CHECK-NEXT: str r6, [r1, #60]
90 ; CHECK-NEXT: lsls r5, r5, #16
91 ; CHECK-NEXT: str r5, [r1, #56]
92 ; CHECK-NEXT: lsls r4, r4, #16
93 ; CHECK-NEXT: str r4, [r1, #52]
94 ; CHECK-NEXT: lsls r2, r2, #16
95 ; CHECK-NEXT: str r2, [r1, #48]
96 ; CHECK-NEXT: adds r1, #64
97 ; CHECK-NEXT: adds r0, #32
98 ; CHECK-NEXT: subs r3, r3, #4
99 ; CHECK-NEXT: bne .LBB0_5
100 ; CHECK-NEXT: .LBB0_6: @ %while.end
101 ; CHECK-NEXT: movs r2, #3
102 ; CHECK-NEXT: ands r7, r2
103 ; CHECK-NEXT: beq .LBB0_10
104 ; CHECK-NEXT: @ %bb.7: @ %while.body12
105 ; CHECK-NEXT: ldrh r2, [r0]
106 ; CHECK-NEXT: lsls r2, r2, #16
107 ; CHECK-NEXT: str r2, [r1]
108 ; CHECK-NEXT: cmp r7, #1
109 ; CHECK-NEXT: beq .LBB0_10
110 ; CHECK-NEXT: @ %bb.8: @ %while.body12.1
111 ; CHECK-NEXT: ldrh r2, [r0, #2]
112 ; CHECK-NEXT: lsls r2, r2, #16
113 ; CHECK-NEXT: str r2, [r1, #4]
114 ; CHECK-NEXT: cmp r7, #2
115 ; CHECK-NEXT: beq .LBB0_10
116 ; CHECK-NEXT: @ %bb.9: @ %while.body12.2
117 ; CHECK-NEXT: ldrh r0, [r0, #4]
118 ; CHECK-NEXT: lsls r0, r0, #16
119 ; CHECK-NEXT: str r0, [r1, #8]
120 ; CHECK-NEXT: .LBB0_10: @ %while.end17
121 ; CHECK-NEXT: add sp, #8
122 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
123 ; CHECK-NEXT: .LBB0_11: @ %while.body.prol.1
124 ; CHECK-NEXT: ldrh r2, [r0, #8]
125 ; CHECK-NEXT: ldrh r4, [r0, #10]
126 ; CHECK-NEXT: ldrh r6, [r0, #12]
127 ; CHECK-NEXT: ldrh r7, [r0, #14]
128 ; CHECK-NEXT: lsls r7, r7, #16
129 ; CHECK-NEXT: lsls r6, r6, #16
130 ; CHECK-NEXT: lsls r4, r4, #16
131 ; CHECK-NEXT: lsls r2, r2, #16
132 ; CHECK-NEXT: str r2, [r1, #16]
133 ; CHECK-NEXT: str r4, [r1, #20]
134 ; CHECK-NEXT: str r6, [r1, #24]
135 ; CHECK-NEXT: str r7, [r1, #28]
136 ; CHECK-NEXT: cmp r5, #2
137 ; CHECK-NEXT: bne .LBB0_13
138 ; CHECK-NEXT: @ %bb.12:
139 ; CHECK-NEXT: subs r3, r3, #2
140 ; CHECK-NEXT: adds r1, #32
141 ; CHECK-NEXT: adds r0, #16
142 ; CHECK-NEXT: b .LBB0_14
143 ; CHECK-NEXT: .LBB0_13: @ %while.body.prol.2
144 ; CHECK-NEXT: ldrh r2, [r0, #16]
145 ; CHECK-NEXT: ldrh r4, [r0, #18]
146 ; CHECK-NEXT: ldrh r5, [r0, #20]
147 ; CHECK-NEXT: ldrh r6, [r0, #22]
148 ; CHECK-NEXT: lsls r6, r6, #16
149 ; CHECK-NEXT: lsls r5, r5, #16
150 ; CHECK-NEXT: lsls r4, r4, #16
151 ; CHECK-NEXT: lsls r2, r2, #16
152 ; CHECK-NEXT: mov r7, r1
153 ; CHECK-NEXT: adds r7, #32
154 ; CHECK-NEXT: stm r7!, {r2, r4, r5, r6}
155 ; CHECK-NEXT: subs r3, r3, #3
156 ; CHECK-NEXT: adds r1, #48
157 ; CHECK-NEXT: adds r0, #24
158 ; CHECK-NEXT: .LBB0_14: @ %while.body.prol.loopexit
159 ; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
160 ; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
161 ; CHECK-NEXT: cmp r2, #3
162 ; CHECK-NEXT: bhs .LBB0_5
163 ; CHECK-NEXT: b .LBB0_6
165 %cmp.not19 = icmp ult i32 %blockSize, 4
166 br i1 %cmp.not19, label %while.end, label %while.body.preheader
168 while.body.preheader: ; preds = %entry
169 %shr = lshr i32 %blockSize, 2
170 %0 = add nsw i32 %shr, -1
171 %xtraiter = and i32 %shr, 3
172 %lcmp.mod.not = icmp eq i32 %xtraiter, 0
173 br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol
175 while.body.prol: ; preds = %while.body.preheader
176 %pIn.0.val.prol = load i16, ptr %pSrc, align 2
177 %1 = getelementptr i8, ptr %pSrc, i32 2
178 %pIn.0.val13.prol = load i16, ptr %1, align 2
179 %conv.i.prol = sext i16 %pIn.0.val13.prol to i32
180 %shl.i.prol = shl nsw i32 %conv.i.prol, 16
181 %conv22.i.prol = zext i16 %pIn.0.val.prol to i32
182 %add.ptr2.prol = getelementptr inbounds i16, ptr %pSrc, i32 4
183 %add.ptr3.prol = getelementptr inbounds i16, ptr %pSrc, i32 2
184 %add.ptr3.val.prol = load i16, ptr %add.ptr3.prol, align 2
185 %2 = getelementptr i16, ptr %pSrc, i32 3
186 %add.ptr3.val14.prol = load i16, ptr %2, align 2
187 %conv.i15.prol = sext i16 %add.ptr3.val14.prol to i32
188 %shl.i16.prol = shl nsw i32 %conv.i15.prol, 16
189 %conv22.i17.prol = zext i16 %add.ptr3.val.prol to i32
190 %shl.prol = shl nuw i32 %conv22.i.prol, 16
191 %shl5.prol = shl nuw i32 %conv22.i17.prol, 16
192 %incdec.ptr.prol = getelementptr inbounds i32, ptr %pDst, i32 1
193 store i32 %shl.prol, ptr %pDst, align 4
194 %incdec.ptr7.prol = getelementptr inbounds i32, ptr %pDst, i32 2
195 store i32 %shl.i.prol, ptr %incdec.ptr.prol, align 4
196 %incdec.ptr8.prol = getelementptr inbounds i32, ptr %pDst, i32 3
197 store i32 %shl5.prol, ptr %incdec.ptr7.prol, align 4
198 %incdec.ptr9.prol = getelementptr inbounds i32, ptr %pDst, i32 4
199 store i32 %shl.i16.prol, ptr %incdec.ptr8.prol, align 4
200 %dec.prol = add nsw i32 %shr, -1
201 %prol.iter.cmp.not = icmp eq i32 %xtraiter, 1
202 br i1 %prol.iter.cmp.not, label %while.body.prol.loopexit, label %while.body.prol.1
204 while.body.prol.1: ; preds = %while.body.prol
205 %pIn.0.val.prol.1 = load i16, ptr %add.ptr2.prol, align 2
206 %3 = getelementptr i16, ptr %pSrc, i32 5
207 %pIn.0.val13.prol.1 = load i16, ptr %3, align 2
208 %conv.i.prol.1 = sext i16 %pIn.0.val13.prol.1 to i32
209 %shl.i.prol.1 = shl nsw i32 %conv.i.prol.1, 16
210 %conv22.i.prol.1 = zext i16 %pIn.0.val.prol.1 to i32
211 %add.ptr2.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 8
212 %add.ptr3.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 6
213 %add.ptr3.val.prol.1 = load i16, ptr %add.ptr3.prol.1, align 2
214 %4 = getelementptr i16, ptr %pSrc, i32 7
215 %add.ptr3.val14.prol.1 = load i16, ptr %4, align 2
216 %conv.i15.prol.1 = sext i16 %add.ptr3.val14.prol.1 to i32
217 %shl.i16.prol.1 = shl nsw i32 %conv.i15.prol.1, 16
218 %conv22.i17.prol.1 = zext i16 %add.ptr3.val.prol.1 to i32
219 %shl.prol.1 = shl nuw i32 %conv22.i.prol.1, 16
220 %shl5.prol.1 = shl nuw i32 %conv22.i17.prol.1, 16
221 %incdec.ptr.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 5
222 store i32 %shl.prol.1, ptr %incdec.ptr9.prol, align 4
223 %incdec.ptr7.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 6
224 store i32 %shl.i.prol.1, ptr %incdec.ptr.prol.1, align 4
225 %incdec.ptr8.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 7
226 store i32 %shl5.prol.1, ptr %incdec.ptr7.prol.1, align 4
227 %incdec.ptr9.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 8
228 store i32 %shl.i16.prol.1, ptr %incdec.ptr8.prol.1, align 4
229 %dec.prol.1 = add nsw i32 %shr, -2
230 %prol.iter.cmp.1.not = icmp eq i32 %xtraiter, 2
231 br i1 %prol.iter.cmp.1.not, label %while.body.prol.loopexit, label %while.body.prol.2
233 while.body.prol.2: ; preds = %while.body.prol.1
234 %pIn.0.val.prol.2 = load i16, ptr %add.ptr2.prol.1, align 2
235 %5 = getelementptr i16, ptr %pSrc, i32 9
236 %pIn.0.val13.prol.2 = load i16, ptr %5, align 2
237 %conv.i.prol.2 = sext i16 %pIn.0.val13.prol.2 to i32
238 %shl.i.prol.2 = shl nsw i32 %conv.i.prol.2, 16
239 %conv22.i.prol.2 = zext i16 %pIn.0.val.prol.2 to i32
240 %add.ptr2.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 12
241 %add.ptr3.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 10
242 %add.ptr3.val.prol.2 = load i16, ptr %add.ptr3.prol.2, align 2
243 %6 = getelementptr i16, ptr %pSrc, i32 11
244 %add.ptr3.val14.prol.2 = load i16, ptr %6, align 2
245 %conv.i15.prol.2 = sext i16 %add.ptr3.val14.prol.2 to i32
246 %shl.i16.prol.2 = shl nsw i32 %conv.i15.prol.2, 16
247 %conv22.i17.prol.2 = zext i16 %add.ptr3.val.prol.2 to i32
248 %shl.prol.2 = shl nuw i32 %conv22.i.prol.2, 16
249 %shl5.prol.2 = shl nuw i32 %conv22.i17.prol.2, 16
250 %incdec.ptr.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 9
251 store i32 %shl.prol.2, ptr %incdec.ptr9.prol.1, align 4
252 %incdec.ptr7.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 10
253 store i32 %shl.i.prol.2, ptr %incdec.ptr.prol.2, align 4
254 %incdec.ptr8.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 11
255 store i32 %shl5.prol.2, ptr %incdec.ptr7.prol.2, align 4
256 %incdec.ptr9.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 12
257 store i32 %shl.i16.prol.2, ptr %incdec.ptr8.prol.2, align 4
258 %dec.prol.2 = add nsw i32 %shr, -3
259 br label %while.body.prol.loopexit
261 while.body.prol.loopexit: ; preds = %while.body.prol, %while.body.prol.1, %while.body.prol.2, %while.body.preheader
262 %add.ptr2.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
263 %incdec.ptr9.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
264 %pDst.addr.022.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
265 %blkCnt.021.unr = phi i32 [ %shr, %while.body.preheader ], [ %dec.prol, %while.body.prol ], [ %dec.prol.1, %while.body.prol.1 ], [ %dec.prol.2, %while.body.prol.2 ]
266 %pIn.020.unr = phi ptr [ %pSrc, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
267 %7 = icmp ult i32 %0, 3
268 br i1 %7, label %while.end, label %while.body
270 while.body: ; preds = %while.body.prol.loopexit, %while.body
271 %pDst.addr.022 = phi ptr [ %incdec.ptr9.3, %while.body ], [ %pDst.addr.022.unr, %while.body.prol.loopexit ]
272 %blkCnt.021 = phi i32 [ %dec.3, %while.body ], [ %blkCnt.021.unr, %while.body.prol.loopexit ]
273 %pIn.020 = phi ptr [ %add.ptr2.3, %while.body ], [ %pIn.020.unr, %while.body.prol.loopexit ]
274 %pIn.0.val = load i16, ptr %pIn.020, align 2
275 %8 = getelementptr i8, ptr %pIn.020, i32 2
276 %pIn.0.val13 = load i16, ptr %8, align 2
277 %conv.i = sext i16 %pIn.0.val13 to i32
278 %shl.i = shl nsw i32 %conv.i, 16
279 %conv22.i = zext i16 %pIn.0.val to i32
280 %add.ptr2 = getelementptr inbounds i16, ptr %pIn.020, i32 4
281 %add.ptr3 = getelementptr inbounds i16, ptr %pIn.020, i32 2
282 %add.ptr3.val = load i16, ptr %add.ptr3, align 2
283 %9 = getelementptr i16, ptr %pIn.020, i32 3
284 %add.ptr3.val14 = load i16, ptr %9, align 2
285 %conv.i15 = sext i16 %add.ptr3.val14 to i32
286 %shl.i16 = shl nsw i32 %conv.i15, 16
287 %conv22.i17 = zext i16 %add.ptr3.val to i32
288 %shl = shl nuw i32 %conv22.i, 16
289 %shl5 = shl nuw i32 %conv22.i17, 16
290 %incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.022, i32 1
291 store i32 %shl, ptr %pDst.addr.022, align 4
292 %incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 2
293 store i32 %shl.i, ptr %incdec.ptr, align 4
294 %incdec.ptr8 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 3
295 store i32 %shl5, ptr %incdec.ptr7, align 4
296 %incdec.ptr9 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 4
297 store i32 %shl.i16, ptr %incdec.ptr8, align 4
298 %pIn.0.val.1 = load i16, ptr %add.ptr2, align 2
299 %10 = getelementptr i16, ptr %pIn.020, i32 5
300 %pIn.0.val13.1 = load i16, ptr %10, align 2
301 %conv.i.1 = sext i16 %pIn.0.val13.1 to i32
302 %shl.i.1 = shl nsw i32 %conv.i.1, 16
303 %conv22.i.1 = zext i16 %pIn.0.val.1 to i32
304 %add.ptr2.1 = getelementptr inbounds i16, ptr %pIn.020, i32 8
305 %add.ptr3.1 = getelementptr inbounds i16, ptr %pIn.020, i32 6
306 %add.ptr3.val.1 = load i16, ptr %add.ptr3.1, align 2
307 %11 = getelementptr i16, ptr %pIn.020, i32 7
308 %add.ptr3.val14.1 = load i16, ptr %11, align 2
309 %conv.i15.1 = sext i16 %add.ptr3.val14.1 to i32
310 %shl.i16.1 = shl nsw i32 %conv.i15.1, 16
311 %conv22.i17.1 = zext i16 %add.ptr3.val.1 to i32
312 %shl.1 = shl nuw i32 %conv22.i.1, 16
313 %shl5.1 = shl nuw i32 %conv22.i17.1, 16
314 %incdec.ptr.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 5
315 store i32 %shl.1, ptr %incdec.ptr9, align 4
316 %incdec.ptr7.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 6
317 store i32 %shl.i.1, ptr %incdec.ptr.1, align 4
318 %incdec.ptr8.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 7
319 store i32 %shl5.1, ptr %incdec.ptr7.1, align 4
320 %incdec.ptr9.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 8
321 store i32 %shl.i16.1, ptr %incdec.ptr8.1, align 4
322 %pIn.0.val.2 = load i16, ptr %add.ptr2.1, align 2
323 %12 = getelementptr i16, ptr %pIn.020, i32 9
324 %pIn.0.val13.2 = load i16, ptr %12, align 2
325 %conv.i.2 = sext i16 %pIn.0.val13.2 to i32
326 %shl.i.2 = shl nsw i32 %conv.i.2, 16
327 %conv22.i.2 = zext i16 %pIn.0.val.2 to i32
328 %add.ptr2.2 = getelementptr inbounds i16, ptr %pIn.020, i32 12
329 %add.ptr3.2 = getelementptr inbounds i16, ptr %pIn.020, i32 10
330 %add.ptr3.val.2 = load i16, ptr %add.ptr3.2, align 2
331 %13 = getelementptr i16, ptr %pIn.020, i32 11
332 %add.ptr3.val14.2 = load i16, ptr %13, align 2
333 %conv.i15.2 = sext i16 %add.ptr3.val14.2 to i32
334 %shl.i16.2 = shl nsw i32 %conv.i15.2, 16
335 %conv22.i17.2 = zext i16 %add.ptr3.val.2 to i32
336 %shl.2 = shl nuw i32 %conv22.i.2, 16
337 %shl5.2 = shl nuw i32 %conv22.i17.2, 16
338 %incdec.ptr.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 9
339 store i32 %shl.2, ptr %incdec.ptr9.1, align 4
340 %incdec.ptr7.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 10
341 store i32 %shl.i.2, ptr %incdec.ptr.2, align 4
342 %incdec.ptr8.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 11
343 store i32 %shl5.2, ptr %incdec.ptr7.2, align 4
344 %incdec.ptr9.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 12
345 store i32 %shl.i16.2, ptr %incdec.ptr8.2, align 4
346 %pIn.0.val.3 = load i16, ptr %add.ptr2.2, align 2
347 %14 = getelementptr i16, ptr %pIn.020, i32 13
348 %pIn.0.val13.3 = load i16, ptr %14, align 2
349 %conv.i.3 = sext i16 %pIn.0.val13.3 to i32
350 %shl.i.3 = shl nsw i32 %conv.i.3, 16
351 %conv22.i.3 = zext i16 %pIn.0.val.3 to i32
352 %add.ptr2.3 = getelementptr inbounds i16, ptr %pIn.020, i32 16
353 %add.ptr3.3 = getelementptr inbounds i16, ptr %pIn.020, i32 14
354 %add.ptr3.val.3 = load i16, ptr %add.ptr3.3, align 2
355 %15 = getelementptr i16, ptr %pIn.020, i32 15
356 %add.ptr3.val14.3 = load i16, ptr %15, align 2
357 %conv.i15.3 = sext i16 %add.ptr3.val14.3 to i32
358 %shl.i16.3 = shl nsw i32 %conv.i15.3, 16
359 %conv22.i17.3 = zext i16 %add.ptr3.val.3 to i32
360 %shl.3 = shl nuw i32 %conv22.i.3, 16
361 %shl5.3 = shl nuw i32 %conv22.i17.3, 16
362 %incdec.ptr.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 13
363 store i32 %shl.3, ptr %incdec.ptr9.2, align 4
364 %incdec.ptr7.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 14
365 store i32 %shl.i.3, ptr %incdec.ptr.3, align 4
366 %incdec.ptr8.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 15
367 store i32 %shl5.3, ptr %incdec.ptr7.3, align 4
368 %incdec.ptr9.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 16
369 store i32 %shl.i16.3, ptr %incdec.ptr8.3, align 4
370 %dec.3 = add nsw i32 %blkCnt.021, -4
371 %cmp.not.3 = icmp eq i32 %dec.3, 0
372 br i1 %cmp.not.3, label %while.end, label %while.body
374 while.end: ; preds = %while.body.prol.loopexit, %while.body, %entry
375 %pIn.0.lcssa = phi ptr [ %pSrc, %entry ], [ %add.ptr2.lcssa.unr, %while.body.prol.loopexit ], [ %add.ptr2.3, %while.body ]
376 %pDst.addr.0.lcssa = phi ptr [ %pDst, %entry ], [ %incdec.ptr9.lcssa.unr, %while.body.prol.loopexit ], [ %incdec.ptr9.3, %while.body ]
377 %rem = and i32 %blockSize, 3
378 %cmp11.not24 = icmp eq i32 %rem, 0
379 br i1 %cmp11.not24, label %while.end17, label %while.body12
381 while.body12: ; preds = %while.end
382 %16 = load i16, ptr %pIn.0.lcssa, align 2
383 %conv = sext i16 %16 to i32
384 %shl14 = shl nsw i32 %conv, 16
385 store i32 %shl14, ptr %pDst.addr.0.lcssa, align 4
386 %cmp11.not = icmp eq i32 %rem, 1
387 br i1 %cmp11.not, label %while.end17, label %while.body12.1
389 while.body12.1: ; preds = %while.body12
390 %incdec.ptr15 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 1
391 %incdec.ptr13 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 1
392 %17 = load i16, ptr %incdec.ptr13, align 2
393 %conv.1 = sext i16 %17 to i32
394 %shl14.1 = shl nsw i32 %conv.1, 16
395 store i32 %shl14.1, ptr %incdec.ptr15, align 4
396 %cmp11.not.1 = icmp eq i32 %rem, 2
397 br i1 %cmp11.not.1, label %while.end17, label %while.body12.2
399 while.body12.2: ; preds = %while.body12.1
400 %incdec.ptr15.1 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 2
401 %incdec.ptr13.1 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 2
402 %18 = load i16, ptr %incdec.ptr13.1, align 2
403 %conv.2 = sext i16 %18 to i32
404 %shl14.2 = shl nsw i32 %conv.2, 16
405 store i32 %shl14.2, ptr %incdec.ptr15.1, align 4
406 br label %while.end17
408 while.end17: ; preds = %while.body12, %while.body12.1, %while.body12.2, %while.end
412 define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly %pDst, i32 noundef %blockSize) {
413 ; CHECK-LABEL: arm_q15_to_q31_altorder:
414 ; CHECK: @ %bb.0: @ %entry
415 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
416 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
417 ; CHECK-NEXT: .pad #8
418 ; CHECK-NEXT: sub sp, #8
419 ; CHECK-NEXT: mov r7, r2
420 ; CHECK-NEXT: lsrs r3, r2, #2
421 ; CHECK-NEXT: beq .LBB1_6
422 ; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
423 ; CHECK-NEXT: movs r5, #3
424 ; CHECK-NEXT: ands r5, r3
425 ; CHECK-NEXT: subs r2, r3, #1
426 ; CHECK-NEXT: cbz r5, .LBB1_4
427 ; CHECK-NEXT: @ %bb.2: @ %while.body.prol
428 ; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
429 ; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
430 ; CHECK-NEXT: ldrh r2, [r0]
431 ; CHECK-NEXT: ldrh r7, [r0, #2]
432 ; CHECK-NEXT: ldrh r4, [r0, #4]
433 ; CHECK-NEXT: ldrh r6, [r0, #6]
434 ; CHECK-NEXT: lsls r6, r6, #16
435 ; CHECK-NEXT: lsls r4, r4, #16
436 ; CHECK-NEXT: lsls r7, r7, #16
437 ; CHECK-NEXT: lsls r2, r2, #16
438 ; CHECK-NEXT: stm r1!, {r2, r7}
439 ; CHECK-NEXT: str r4, [r1]
440 ; CHECK-NEXT: str r6, [r1, #4]
441 ; CHECK-NEXT: subs r1, #8
442 ; CHECK-NEXT: cmp r5, #1
443 ; CHECK-NEXT: bne .LBB1_11
444 ; CHECK-NEXT: @ %bb.3:
445 ; CHECK-NEXT: adds r1, #16
446 ; CHECK-NEXT: adds r0, #8
447 ; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
448 ; CHECK-NEXT: mov r3, r2
449 ; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
450 ; CHECK-NEXT: .LBB1_4: @ %while.body.prol.loopexit
451 ; CHECK-NEXT: cmp r2, #3
452 ; CHECK-NEXT: blo .LBB1_6
453 ; CHECK-NEXT: .LBB1_5: @ %while.body
454 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
455 ; CHECK-NEXT: ldrh r2, [r0]
456 ; CHECK-NEXT: ldrh r4, [r0, #2]
457 ; CHECK-NEXT: ldrh r5, [r0, #4]
458 ; CHECK-NEXT: ldrh r6, [r0, #6]
459 ; CHECK-NEXT: lsls r6, r6, #16
460 ; CHECK-NEXT: str r6, [r1, #12]
461 ; CHECK-NEXT: lsls r5, r5, #16
462 ; CHECK-NEXT: str r5, [r1, #8]
463 ; CHECK-NEXT: lsls r4, r4, #16
464 ; CHECK-NEXT: str r4, [r1, #4]
465 ; CHECK-NEXT: lsls r2, r2, #16
466 ; CHECK-NEXT: str r2, [r1]
467 ; CHECK-NEXT: ldrh r2, [r0, #8]
468 ; CHECK-NEXT: ldrh r4, [r0, #10]
469 ; CHECK-NEXT: ldrh r5, [r0, #12]
470 ; CHECK-NEXT: ldrh r6, [r0, #14]
471 ; CHECK-NEXT: lsls r6, r6, #16
472 ; CHECK-NEXT: str r6, [r1, #28]
473 ; CHECK-NEXT: lsls r5, r5, #16
474 ; CHECK-NEXT: str r5, [r1, #24]
475 ; CHECK-NEXT: lsls r4, r4, #16
476 ; CHECK-NEXT: str r4, [r1, #20]
477 ; CHECK-NEXT: lsls r2, r2, #16
478 ; CHECK-NEXT: str r2, [r1, #16]
479 ; CHECK-NEXT: ldrh r2, [r0, #16]
480 ; CHECK-NEXT: ldrh r4, [r0, #18]
481 ; CHECK-NEXT: ldrh r5, [r0, #20]
482 ; CHECK-NEXT: ldrh r6, [r0, #22]
483 ; CHECK-NEXT: lsls r6, r6, #16
484 ; CHECK-NEXT: str r6, [r1, #44]
485 ; CHECK-NEXT: lsls r5, r5, #16
486 ; CHECK-NEXT: str r5, [r1, #40]
487 ; CHECK-NEXT: lsls r4, r4, #16
488 ; CHECK-NEXT: str r4, [r1, #36]
489 ; CHECK-NEXT: lsls r2, r2, #16
490 ; CHECK-NEXT: str r2, [r1, #32]
491 ; CHECK-NEXT: ldrh r2, [r0, #24]
492 ; CHECK-NEXT: ldrh r4, [r0, #26]
493 ; CHECK-NEXT: ldrh r5, [r0, #28]
494 ; CHECK-NEXT: ldrh r6, [r0, #30]
495 ; CHECK-NEXT: lsls r6, r6, #16
496 ; CHECK-NEXT: str r6, [r1, #60]
497 ; CHECK-NEXT: lsls r5, r5, #16
498 ; CHECK-NEXT: str r5, [r1, #56]
499 ; CHECK-NEXT: lsls r4, r4, #16
500 ; CHECK-NEXT: str r4, [r1, #52]
501 ; CHECK-NEXT: lsls r2, r2, #16
502 ; CHECK-NEXT: str r2, [r1, #48]
503 ; CHECK-NEXT: adds r1, #64
504 ; CHECK-NEXT: subs r3, r3, #4
505 ; CHECK-NEXT: adds r0, #32
506 ; CHECK-NEXT: cmp r3, #0
507 ; CHECK-NEXT: bne .LBB1_5
508 ; CHECK-NEXT: .LBB1_6: @ %while.end
509 ; CHECK-NEXT: movs r2, #3
510 ; CHECK-NEXT: ands r7, r2
511 ; CHECK-NEXT: beq .LBB1_10
512 ; CHECK-NEXT: @ %bb.7: @ %while.body12
513 ; CHECK-NEXT: ldrh r2, [r0]
514 ; CHECK-NEXT: lsls r2, r2, #16
515 ; CHECK-NEXT: str r2, [r1]
516 ; CHECK-NEXT: cmp r7, #1
517 ; CHECK-NEXT: beq .LBB1_10
518 ; CHECK-NEXT: @ %bb.8: @ %while.body12.1
519 ; CHECK-NEXT: ldrh r2, [r0, #2]
520 ; CHECK-NEXT: lsls r2, r2, #16
521 ; CHECK-NEXT: str r2, [r1, #4]
522 ; CHECK-NEXT: cmp r7, #2
523 ; CHECK-NEXT: beq .LBB1_10
524 ; CHECK-NEXT: @ %bb.9: @ %while.body12.2
525 ; CHECK-NEXT: ldrh r0, [r0, #4]
526 ; CHECK-NEXT: lsls r0, r0, #16
527 ; CHECK-NEXT: str r0, [r1, #8]
528 ; CHECK-NEXT: .LBB1_10: @ %while.end17
529 ; CHECK-NEXT: add sp, #8
530 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
531 ; CHECK-NEXT: .LBB1_11: @ %while.body.prol.1
532 ; CHECK-NEXT: ldrh r2, [r0, #8]
533 ; CHECK-NEXT: ldrh r4, [r0, #10]
534 ; CHECK-NEXT: ldrh r6, [r0, #12]
535 ; CHECK-NEXT: ldrh r7, [r0, #14]
536 ; CHECK-NEXT: lsls r7, r7, #16
537 ; CHECK-NEXT: lsls r6, r6, #16
538 ; CHECK-NEXT: lsls r4, r4, #16
539 ; CHECK-NEXT: lsls r2, r2, #16
540 ; CHECK-NEXT: str r2, [r1, #16]
541 ; CHECK-NEXT: str r4, [r1, #20]
542 ; CHECK-NEXT: str r6, [r1, #24]
543 ; CHECK-NEXT: str r7, [r1, #28]
544 ; CHECK-NEXT: cmp r5, #2
545 ; CHECK-NEXT: bne .LBB1_13
546 ; CHECK-NEXT: @ %bb.12:
547 ; CHECK-NEXT: subs r3, r3, #2
548 ; CHECK-NEXT: adds r1, #32
549 ; CHECK-NEXT: adds r0, #16
550 ; CHECK-NEXT: b .LBB1_14
551 ; CHECK-NEXT: .LBB1_13: @ %while.body.prol.2
552 ; CHECK-NEXT: ldrh r2, [r0, #16]
553 ; CHECK-NEXT: ldrh r4, [r0, #18]
554 ; CHECK-NEXT: ldrh r5, [r0, #20]
555 ; CHECK-NEXT: ldrh r6, [r0, #22]
556 ; CHECK-NEXT: lsls r6, r6, #16
557 ; CHECK-NEXT: lsls r5, r5, #16
558 ; CHECK-NEXT: lsls r4, r4, #16
559 ; CHECK-NEXT: lsls r2, r2, #16
560 ; CHECK-NEXT: mov r7, r1
561 ; CHECK-NEXT: adds r7, #32
562 ; CHECK-NEXT: stm r7!, {r2, r4, r5, r6}
563 ; CHECK-NEXT: subs r3, r3, #3
564 ; CHECK-NEXT: adds r1, #48
565 ; CHECK-NEXT: adds r0, #24
566 ; CHECK-NEXT: .LBB1_14: @ %while.body.prol.loopexit
567 ; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
568 ; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
569 ; CHECK-NEXT: cmp r2, #3
570 ; CHECK-NEXT: bhs .LBB1_5
571 ; CHECK-NEXT: b .LBB1_6
573 %cmp.not18 = icmp ult i32 %blockSize, 4
574 br i1 %cmp.not18, label %while.end, label %while.body.preheader
576 while.body.preheader: ; preds = %entry
577 %shr = lshr i32 %blockSize, 2
578 %0 = add nsw i32 %shr, -1
579 %xtraiter = and i32 %shr, 3
580 %lcmp.mod.not = icmp eq i32 %xtraiter, 0
581 br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol
583 while.body.prol: ; preds = %while.body.preheader
584 %arrayidx.i.prol = getelementptr inbounds i16, ptr %pSrc, i32 1
585 %1 = load i16, ptr %arrayidx.i.prol, align 2
586 %conv.i.prol = sext i16 %1 to i32
587 %shl.i.prol = shl nsw i32 %conv.i.prol, 16
588 %2 = load i16, ptr %pSrc, align 2
589 %conv22.i.prol = zext i16 %2 to i32
590 %add.ptr2.prol = getelementptr inbounds i16, ptr %pSrc, i32 4
591 %add.ptr3.prol = getelementptr inbounds i16, ptr %pSrc, i32 2
592 %arrayidx.i13.prol = getelementptr inbounds i16, ptr %pSrc, i32 3
593 %3 = load i16, ptr %arrayidx.i13.prol, align 2
594 %conv.i14.prol = sext i16 %3 to i32
595 %shl.i15.prol = shl nsw i32 %conv.i14.prol, 16
596 %4 = load i16, ptr %add.ptr3.prol, align 2
597 %conv22.i16.prol = zext i16 %4 to i32
598 %shl.prol = shl nuw i32 %conv22.i.prol, 16
599 %shl5.prol = shl nuw i32 %conv22.i16.prol, 16
600 %incdec.ptr.prol = getelementptr inbounds i32, ptr %pDst, i32 1
601 store i32 %shl.prol, ptr %pDst, align 4
602 %incdec.ptr7.prol = getelementptr inbounds i32, ptr %pDst, i32 2
603 store i32 %shl.i.prol, ptr %incdec.ptr.prol, align 4
604 %incdec.ptr8.prol = getelementptr inbounds i32, ptr %pDst, i32 3
605 store i32 %shl5.prol, ptr %incdec.ptr7.prol, align 4
606 %incdec.ptr9.prol = getelementptr inbounds i32, ptr %pDst, i32 4
607 store i32 %shl.i15.prol, ptr %incdec.ptr8.prol, align 4
608 %dec.prol = add nsw i32 %shr, -1
609 %prol.iter.cmp.not = icmp eq i32 %xtraiter, 1
610 br i1 %prol.iter.cmp.not, label %while.body.prol.loopexit, label %while.body.prol.1
612 while.body.prol.1: ; preds = %while.body.prol
613 %arrayidx.i.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 5
614 %5 = load i16, ptr %arrayidx.i.prol.1, align 2
615 %conv.i.prol.1 = sext i16 %5 to i32
616 %shl.i.prol.1 = shl nsw i32 %conv.i.prol.1, 16
617 %6 = load i16, ptr %add.ptr2.prol, align 2
618 %conv22.i.prol.1 = zext i16 %6 to i32
619 %add.ptr2.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 8
620 %add.ptr3.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 6
621 %arrayidx.i13.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 7
622 %7 = load i16, ptr %arrayidx.i13.prol.1, align 2
623 %conv.i14.prol.1 = sext i16 %7 to i32
624 %shl.i15.prol.1 = shl nsw i32 %conv.i14.prol.1, 16
625 %8 = load i16, ptr %add.ptr3.prol.1, align 2
626 %conv22.i16.prol.1 = zext i16 %8 to i32
627 %shl.prol.1 = shl nuw i32 %conv22.i.prol.1, 16
628 %shl5.prol.1 = shl nuw i32 %conv22.i16.prol.1, 16
629 %incdec.ptr.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 5
630 store i32 %shl.prol.1, ptr %incdec.ptr9.prol, align 4
631 %incdec.ptr7.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 6
632 store i32 %shl.i.prol.1, ptr %incdec.ptr.prol.1, align 4
633 %incdec.ptr8.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 7
634 store i32 %shl5.prol.1, ptr %incdec.ptr7.prol.1, align 4
635 %incdec.ptr9.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 8
636 store i32 %shl.i15.prol.1, ptr %incdec.ptr8.prol.1, align 4
637 %dec.prol.1 = add nsw i32 %shr, -2
638 %prol.iter.cmp.1.not = icmp eq i32 %xtraiter, 2
639 br i1 %prol.iter.cmp.1.not, label %while.body.prol.loopexit, label %while.body.prol.2
641 while.body.prol.2: ; preds = %while.body.prol.1
642 %arrayidx.i.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 9
643 %9 = load i16, ptr %arrayidx.i.prol.2, align 2
644 %conv.i.prol.2 = sext i16 %9 to i32
645 %shl.i.prol.2 = shl nsw i32 %conv.i.prol.2, 16
646 %10 = load i16, ptr %add.ptr2.prol.1, align 2
647 %conv22.i.prol.2 = zext i16 %10 to i32
648 %add.ptr2.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 12
649 %add.ptr3.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 10
650 %arrayidx.i13.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 11
651 %11 = load i16, ptr %arrayidx.i13.prol.2, align 2
652 %conv.i14.prol.2 = sext i16 %11 to i32
653 %shl.i15.prol.2 = shl nsw i32 %conv.i14.prol.2, 16
654 %12 = load i16, ptr %add.ptr3.prol.2, align 2
655 %conv22.i16.prol.2 = zext i16 %12 to i32
656 %shl.prol.2 = shl nuw i32 %conv22.i.prol.2, 16
657 %shl5.prol.2 = shl nuw i32 %conv22.i16.prol.2, 16
658 %incdec.ptr.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 9
659 store i32 %shl.prol.2, ptr %incdec.ptr9.prol.1, align 4
660 %incdec.ptr7.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 10
661 store i32 %shl.i.prol.2, ptr %incdec.ptr.prol.2, align 4
662 %incdec.ptr8.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 11
663 store i32 %shl5.prol.2, ptr %incdec.ptr7.prol.2, align 4
664 %incdec.ptr9.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 12
665 store i32 %shl.i15.prol.2, ptr %incdec.ptr8.prol.2, align 4
666 %dec.prol.2 = add nsw i32 %shr, -3
667 br label %while.body.prol.loopexit
669 while.body.prol.loopexit: ; preds = %while.body.prol, %while.body.prol.1, %while.body.prol.2, %while.body.preheader
670 %add.ptr2.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
671 %incdec.ptr9.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
672 %pDst.addr.021.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
673 %blkCnt.020.unr = phi i32 [ %shr, %while.body.preheader ], [ %dec.prol, %while.body.prol ], [ %dec.prol.1, %while.body.prol.1 ], [ %dec.prol.2, %while.body.prol.2 ]
674 %pIn.019.unr = phi ptr [ %pSrc, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
675 %13 = icmp ult i32 %0, 3
676 br i1 %13, label %while.end, label %while.body
678 while.body: ; preds = %while.body.prol.loopexit, %while.body
679 %pDst.addr.021 = phi ptr [ %incdec.ptr9.3, %while.body ], [ %pDst.addr.021.unr, %while.body.prol.loopexit ]
680 %blkCnt.020 = phi i32 [ %dec.3, %while.body ], [ %blkCnt.020.unr, %while.body.prol.loopexit ]
681 %pIn.019 = phi ptr [ %add.ptr2.3, %while.body ], [ %pIn.019.unr, %while.body.prol.loopexit ]
682 %arrayidx.i = getelementptr inbounds i16, ptr %pIn.019, i32 1
683 %14 = load i16, ptr %arrayidx.i, align 2
684 %conv.i = sext i16 %14 to i32
685 %shl.i = shl nsw i32 %conv.i, 16
686 %15 = load i16, ptr %pIn.019, align 2
687 %conv22.i = zext i16 %15 to i32
688 %add.ptr2 = getelementptr inbounds i16, ptr %pIn.019, i32 4
689 %add.ptr3 = getelementptr inbounds i16, ptr %pIn.019, i32 2
690 %arrayidx.i13 = getelementptr inbounds i16, ptr %pIn.019, i32 3
691 %16 = load i16, ptr %arrayidx.i13, align 2
692 %conv.i14 = sext i16 %16 to i32
693 %shl.i15 = shl nsw i32 %conv.i14, 16
694 %17 = load i16, ptr %add.ptr3, align 2
695 %conv22.i16 = zext i16 %17 to i32
696 %shl = shl nuw i32 %conv22.i, 16
697 %shl5 = shl nuw i32 %conv22.i16, 16
698 %incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.021, i32 1
699 store i32 %shl, ptr %pDst.addr.021, align 4
700 %incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 2
701 store i32 %shl.i, ptr %incdec.ptr, align 4
702 %incdec.ptr8 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 3
703 store i32 %shl5, ptr %incdec.ptr7, align 4
704 %incdec.ptr9 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 4
705 store i32 %shl.i15, ptr %incdec.ptr8, align 4
706 %arrayidx.i.1 = getelementptr inbounds i16, ptr %pIn.019, i32 5
707 %18 = load i16, ptr %arrayidx.i.1, align 2
708 %conv.i.1 = sext i16 %18 to i32
709 %shl.i.1 = shl nsw i32 %conv.i.1, 16
710 %19 = load i16, ptr %add.ptr2, align 2
711 %conv22.i.1 = zext i16 %19 to i32
712 %add.ptr2.1 = getelementptr inbounds i16, ptr %pIn.019, i32 8
713 %add.ptr3.1 = getelementptr inbounds i16, ptr %pIn.019, i32 6
714 %arrayidx.i13.1 = getelementptr inbounds i16, ptr %pIn.019, i32 7
715 %20 = load i16, ptr %arrayidx.i13.1, align 2
716 %conv.i14.1 = sext i16 %20 to i32
717 %shl.i15.1 = shl nsw i32 %conv.i14.1, 16
718 %21 = load i16, ptr %add.ptr3.1, align 2
719 %conv22.i16.1 = zext i16 %21 to i32
720 %shl.1 = shl nuw i32 %conv22.i.1, 16
721 %shl5.1 = shl nuw i32 %conv22.i16.1, 16
722 %incdec.ptr.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 5
723 store i32 %shl.1, ptr %incdec.ptr9, align 4
724 %incdec.ptr7.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 6
725 store i32 %shl.i.1, ptr %incdec.ptr.1, align 4
726 %incdec.ptr8.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 7
727 store i32 %shl5.1, ptr %incdec.ptr7.1, align 4
728 %incdec.ptr9.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 8
729 store i32 %shl.i15.1, ptr %incdec.ptr8.1, align 4
730 %arrayidx.i.2 = getelementptr inbounds i16, ptr %pIn.019, i32 9
731 %22 = load i16, ptr %arrayidx.i.2, align 2
732 %conv.i.2 = sext i16 %22 to i32
733 %shl.i.2 = shl nsw i32 %conv.i.2, 16
734 %23 = load i16, ptr %add.ptr2.1, align 2
735 %conv22.i.2 = zext i16 %23 to i32
736 %add.ptr2.2 = getelementptr inbounds i16, ptr %pIn.019, i32 12
737 %add.ptr3.2 = getelementptr inbounds i16, ptr %pIn.019, i32 10
738 %arrayidx.i13.2 = getelementptr inbounds i16, ptr %pIn.019, i32 11
739 %24 = load i16, ptr %arrayidx.i13.2, align 2
740 %conv.i14.2 = sext i16 %24 to i32
741 %shl.i15.2 = shl nsw i32 %conv.i14.2, 16
742 %25 = load i16, ptr %add.ptr3.2, align 2
743 %conv22.i16.2 = zext i16 %25 to i32
744 %shl.2 = shl nuw i32 %conv22.i.2, 16
745 %shl5.2 = shl nuw i32 %conv22.i16.2, 16
746 %incdec.ptr.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 9
747 store i32 %shl.2, ptr %incdec.ptr9.1, align 4
748 %incdec.ptr7.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 10
749 store i32 %shl.i.2, ptr %incdec.ptr.2, align 4
750 %incdec.ptr8.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 11
751 store i32 %shl5.2, ptr %incdec.ptr7.2, align 4
752 %incdec.ptr9.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 12
753 store i32 %shl.i15.2, ptr %incdec.ptr8.2, align 4
754 %arrayidx.i.3 = getelementptr inbounds i16, ptr %pIn.019, i32 13
755 %26 = load i16, ptr %arrayidx.i.3, align 2
756 %conv.i.3 = sext i16 %26 to i32
757 %shl.i.3 = shl nsw i32 %conv.i.3, 16
758 %27 = load i16, ptr %add.ptr2.2, align 2
759 %conv22.i.3 = zext i16 %27 to i32
760 %add.ptr2.3 = getelementptr inbounds i16, ptr %pIn.019, i32 16
761 %add.ptr3.3 = getelementptr inbounds i16, ptr %pIn.019, i32 14
762 %arrayidx.i13.3 = getelementptr inbounds i16, ptr %pIn.019, i32 15
763 %28 = load i16, ptr %arrayidx.i13.3, align 2
764 %conv.i14.3 = sext i16 %28 to i32
765 %shl.i15.3 = shl nsw i32 %conv.i14.3, 16
766 %29 = load i16, ptr %add.ptr3.3, align 2
767 %conv22.i16.3 = zext i16 %29 to i32
768 %shl.3 = shl nuw i32 %conv22.i.3, 16
769 %shl5.3 = shl nuw i32 %conv22.i16.3, 16
770 %incdec.ptr.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 13
771 store i32 %shl.3, ptr %incdec.ptr9.2, align 4
772 %incdec.ptr7.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 14
773 store i32 %shl.i.3, ptr %incdec.ptr.3, align 4
774 %incdec.ptr8.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 15
775 store i32 %shl5.3, ptr %incdec.ptr7.3, align 4
776 %incdec.ptr9.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 16
777 store i32 %shl.i15.3, ptr %incdec.ptr8.3, align 4
778 %dec.3 = add nsw i32 %blkCnt.020, -4
779 %cmp.not.3 = icmp eq i32 %dec.3, 0
780 br i1 %cmp.not.3, label %while.end, label %while.body
782 while.end: ; preds = %while.body.prol.loopexit, %while.body, %entry
783 %pIn.0.lcssa = phi ptr [ %pSrc, %entry ], [ %add.ptr2.lcssa.unr, %while.body.prol.loopexit ], [ %add.ptr2.3, %while.body ]
784 %pDst.addr.0.lcssa = phi ptr [ %pDst, %entry ], [ %incdec.ptr9.lcssa.unr, %while.body.prol.loopexit ], [ %incdec.ptr9.3, %while.body ]
785 %rem = and i32 %blockSize, 3
786 %cmp11.not23 = icmp eq i32 %rem, 0
787 br i1 %cmp11.not23, label %while.end17, label %while.body12
789 while.body12: ; preds = %while.end
790 %30 = load i16, ptr %pIn.0.lcssa, align 2
791 %conv = sext i16 %30 to i32
792 %shl14 = shl nsw i32 %conv, 16
793 store i32 %shl14, ptr %pDst.addr.0.lcssa, align 4
794 %cmp11.not = icmp eq i32 %rem, 1
795 br i1 %cmp11.not, label %while.end17, label %while.body12.1
797 while.body12.1: ; preds = %while.body12
798 %incdec.ptr15 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 1
799 %incdec.ptr13 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 1
800 %31 = load i16, ptr %incdec.ptr13, align 2
801 %conv.1 = sext i16 %31 to i32
802 %shl14.1 = shl nsw i32 %conv.1, 16
803 store i32 %shl14.1, ptr %incdec.ptr15, align 4
804 %cmp11.not.1 = icmp eq i32 %rem, 2
805 br i1 %cmp11.not.1, label %while.end17, label %while.body12.2
807 while.body12.2: ; preds = %while.body12.1
808 %incdec.ptr15.1 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 2
809 %incdec.ptr13.1 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 2
810 %32 = load i16, ptr %incdec.ptr13.1, align 2
811 %conv.2 = sext i16 %32 to i32
812 %shl14.2 = shl nsw i32 %conv.2, 16
813 store i32 %shl14.2, ptr %incdec.ptr15.1, align 4
814 br label %while.end17
816 while.end17: ; preds = %while.body12, %while.body12.1, %while.body12.2, %while.end