1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -mtriple=thumbv7-unknown-linux-gnueabihf -arm-parallel-dsp -dce %s -S -o - | FileCheck %s
4 define i32 @first_mul_invalid(ptr nocapture readonly %in, ptr nocapture readonly %b) {
5 ; CHECK-LABEL: @first_mul_invalid(
7 ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[IN:%.*]], align 2
8 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
9 ; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[B:%.*]], align 2
10 ; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32
11 ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar(i32 [[CONV]], i32 [[CONV2]])
12 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -1
13 ; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
14 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP2]] to i32
15 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 1
16 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2
17 ; CHECK-NEXT: [[CONV6:%.*]] = sext i16 [[TMP3]] to i32
18 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]]
19 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[CALL]]
20 ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3
21 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2
22 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2
23 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2
24 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP5]], i32 [[TMP7]], i32 [[ADD]])
25 ; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5
26 ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2
27 ; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4
28 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2
29 ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP10]], i32 [[TMP12]], i32 [[TMP8]])
30 ; CHECK-NEXT: ret i32 [[TMP13]]
33 %0 = load i16, ptr %in, align 2
34 %conv = sext i16 %0 to i32
35 %1 = load i16, ptr %b, align 2
36 %conv2 = sext i16 %1 to i32
37 %call = tail call i32 @bar(i32 %conv, i32 %conv2)
38 %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1
39 %2 = load i16, ptr %arrayidx3, align 2
40 %conv4 = sext i16 %2 to i32
41 %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1
42 %3 = load i16, ptr %arrayidx5, align 2
43 %conv6 = sext i16 %3 to i32
44 %mul = mul nsw i32 %conv6, %conv4
45 %add = add i32 %mul, %call
46 %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2
47 %4 = load i16, ptr %arrayidx7, align 2
48 %conv8 = sext i16 %4 to i32
49 %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2
50 %5 = load i16, ptr %arrayidx9, align 2
51 %conv10 = sext i16 %5 to i32
52 %mul11 = mul nsw i32 %conv10, %conv8
53 %add12 = add i32 %add, %mul11
54 %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3
55 %6 = load i16, ptr %arrayidx13, align 2
56 %conv14 = sext i16 %6 to i32
57 %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3
58 %7 = load i16, ptr %arrayidx15, align 2
59 %conv16 = sext i16 %7 to i32
60 %mul17 = mul nsw i32 %conv16, %conv14
61 %add18 = add i32 %add12, %mul17
62 %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4
63 %8 = load i16, ptr %arrayidx19, align 2
64 %conv20 = sext i16 %8 to i32
65 %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4
66 %9 = load i16, ptr %arrayidx21, align 2
67 %conv22 = sext i16 %9 to i32
68 %mul23 = mul nsw i32 %conv22, %conv20
69 %add24 = add i32 %add18, %mul23
70 %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5
71 %10 = load i16, ptr %arrayidx25, align 2
72 %conv26 = sext i16 %10 to i32
73 %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5
74 %11 = load i16, ptr %arrayidx27, align 2
75 %conv28 = sext i16 %11 to i32
76 %mul29 = mul nsw i32 %conv28, %conv26
77 %add30 = add i32 %add24, %mul29
81 define i32 @with_no_acc_input(ptr nocapture readonly %in, ptr nocapture readonly %b) {
82 ; CHECK-LABEL: @with_no_acc_input(
84 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN:%.*]], i32 -1
85 ; CHECK-NEXT: [[LD_2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
86 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[LD_2]] to i32
87 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 1
88 ; CHECK-NEXT: [[LD_3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2
89 ; CHECK-NEXT: [[CONV6:%.*]] = sext i16 [[LD_3]] to i32
90 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]]
91 ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3
92 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2
93 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2
94 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2
95 ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP3]], i32 [[MUL]])
96 ; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5
97 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2
98 ; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4
99 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2
100 ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP6]], i32 [[TMP8]], i32 [[TMP4]])
101 ; CHECK-NEXT: ret i32 [[TMP9]]
104 %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1
105 %ld.2 = load i16, ptr %arrayidx3, align 2
106 %conv4 = sext i16 %ld.2 to i32
107 %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1
108 %ld.3 = load i16, ptr %arrayidx5, align 2
109 %conv6 = sext i16 %ld.3 to i32
110 %mul = mul nsw i32 %conv6, %conv4
111 %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2
112 %ld.4 = load i16, ptr %arrayidx7, align 2
113 %conv8 = sext i16 %ld.4 to i32
114 %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2
115 %ld.5 = load i16, ptr %arrayidx9, align 2
116 %conv10 = sext i16 %ld.5 to i32
117 %mul11 = mul nsw i32 %conv10, %conv8
118 %add12 = add i32 %mul, %mul11
119 %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3
120 %ld.6 = load i16, ptr %arrayidx13, align 2
121 %conv14 = sext i16 %ld.6 to i32
122 %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3
123 %ld.7 = load i16, ptr %arrayidx15, align 2
124 %conv16 = sext i16 %ld.7 to i32
125 %mul17 = mul nsw i32 %conv16, %conv14
126 %add18 = add i32 %add12, %mul17
127 %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4
128 %ld.8 = load i16, ptr %arrayidx19, align 2
129 %conv20 = sext i16 %ld.8 to i32
130 %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4
131 %ld.9 = load i16, ptr %arrayidx21, align 2
132 %conv22 = sext i16 %ld.9 to i32
133 %mul23 = mul nsw i32 %conv22, %conv20
134 %add24 = add i32 %add18, %mul23
135 %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5
136 %ld.10 = load i16, ptr %arrayidx25, align 2
137 %conv26 = sext i16 %ld.10 to i32
138 %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5
139 %ld.11 = load i16, ptr %arrayidx27, align 2
140 %conv28 = sext i16 %ld.11 to i32
141 %mul29 = mul nsw i32 %conv28, %conv26
142 %add30 = add i32 %add24, %mul29
146 define i64 @with_64bit_acc(ptr nocapture readonly %in, ptr nocapture readonly %b) {
147 ; CHECK-LABEL: @with_64bit_acc(
149 ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[IN:%.*]], align 2
150 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
151 ; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[B:%.*]], align 2
152 ; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32
153 ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar(i32 [[CONV]], i32 [[CONV2]])
154 ; CHECK-NEXT: [[SEXT_0:%.*]] = sext i32 [[CALL]] to i64
155 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -1
156 ; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
157 ; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP2]] to i32
158 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 1
159 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2
160 ; CHECK-NEXT: [[CONV6:%.*]] = sext i16 [[TMP3]] to i32
161 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]]
162 ; CHECK-NEXT: [[SEXT_1:%.*]] = sext i32 [[MUL]] to i64
163 ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[SEXT_0]], [[SEXT_1]]
164 ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3
165 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2
166 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2
167 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2
168 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP5]], i32 [[TMP7]], i64 [[ADD]])
169 ; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5
170 ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2
171 ; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4
172 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2
173 ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP10]], i32 [[TMP12]], i64 [[TMP8]])
174 ; CHECK-NEXT: ret i64 [[TMP13]]
177 %0 = load i16, ptr %in, align 2
178 %conv = sext i16 %0 to i32
179 %1 = load i16, ptr %b, align 2
180 %conv2 = sext i16 %1 to i32
181 %call = tail call i32 @bar(i32 %conv, i32 %conv2)
182 %sext.0 = sext i32 %call to i64
183 %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1
184 %2 = load i16, ptr %arrayidx3, align 2
185 %conv4 = sext i16 %2 to i32
186 %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1
187 %3 = load i16, ptr %arrayidx5, align 2
188 %conv6 = sext i16 %3 to i32
189 %mul = mul nsw i32 %conv6, %conv4
190 %sext.1 = sext i32 %mul to i64
191 %add = add i64 %sext.0, %sext.1
192 %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2
193 %4 = load i16, ptr %arrayidx7, align 2
194 %conv8 = sext i16 %4 to i32
195 %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2
196 %5 = load i16, ptr %arrayidx9, align 2
197 %conv10 = sext i16 %5 to i32
198 %mul11 = mul nsw i32 %conv10, %conv8
199 %sext.2 = sext i32 %mul11 to i64
200 %add12 = add i64 %add, %sext.2
201 %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3
202 %6 = load i16, ptr %arrayidx13, align 2
203 %conv14 = sext i16 %6 to i32
204 %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3
205 %7 = load i16, ptr %arrayidx15, align 2
206 %conv16 = sext i16 %7 to i32
207 %mul17 = mul nsw i32 %conv16, %conv14
208 %sext.3 = sext i32 %mul17 to i64
209 %add18 = add i64 %add12, %sext.3
210 %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4
211 %8 = load i16, ptr %arrayidx19, align 2
212 %conv20 = sext i16 %8 to i32
213 %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4
214 %9 = load i16, ptr %arrayidx21, align 2
215 %conv22 = sext i16 %9 to i32
216 %mul23 = mul nsw i32 %conv22, %conv20
217 %sext.4 = sext i32 %mul23 to i64
218 %add24 = add i64 %add18, %sext.4
219 %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5
220 %10 = load i16, ptr %arrayidx25, align 2
221 %conv26 = sext i16 %10 to i32
222 %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5
223 %11 = load i16, ptr %arrayidx27, align 2
224 %conv28 = sext i16 %11 to i32
225 %mul29 = mul nsw i32 %conv28, %conv26
226 %sext.5 = sext i32 %mul29 to i64
227 %add30 = add i64 %add24, %sext.5
231 define i64 @with_64bit_add_acc(ptr nocapture readonly %px.10756.unr, ptr nocapture readonly %py.8757.unr, i32 %acc) {
232 ; CHECK-LABEL: @with_64bit_add_acc(
234 ; CHECK-NEXT: [[SUM_3758_UNR:%.*]] = sext i32 [[ACC:%.*]] to i64
235 ; CHECK-NEXT: br label [[BB_1:%.*]]
237 ; CHECK-NEXT: [[INCDEC_PTR184_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PX_10756_UNR:%.*]], i32 1
238 ; CHECK-NEXT: [[TMP216:%.*]] = load i16, ptr [[PX_10756_UNR]], align 2
239 ; CHECK-NEXT: [[CONV185_EPIL:%.*]] = sext i16 [[TMP216]] to i32
240 ; CHECK-NEXT: [[INCDEC_PTR186_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PY_8757_UNR:%.*]], i32 -1
241 ; CHECK-NEXT: [[TMP217:%.*]] = load i16, ptr [[PY_8757_UNR]], align 2
242 ; CHECK-NEXT: [[CONV187_EPIL:%.*]] = sext i16 [[TMP217]] to i32
243 ; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[CONV187_EPIL]], [[CONV185_EPIL]]
244 ; CHECK-NEXT: [[CONV188_EPIL:%.*]] = sext i32 [[MUL_EPIL]] to i64
245 ; CHECK-NEXT: [[ADD189_EPIL:%.*]] = add nsw i64 [[SUM_3758_UNR]], [[CONV188_EPIL]]
246 ; CHECK-NEXT: [[INCDEC_PTR190_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PX_10756_UNR]], i32 2
247 ; CHECK-NEXT: [[TMP218:%.*]] = load i16, ptr [[INCDEC_PTR184_EPIL]], align 2
248 ; CHECK-NEXT: [[CONV191_EPIL:%.*]] = sext i16 [[TMP218]] to i32
249 ; CHECK-NEXT: [[TMP219:%.*]] = load i16, ptr [[INCDEC_PTR186_EPIL]], align 2
250 ; CHECK-NEXT: [[CONV193_EPIL:%.*]] = sext i16 [[TMP219]] to i32
251 ; CHECK-NEXT: [[MUL194_EPIL:%.*]] = mul nsw i32 [[CONV193_EPIL]], [[CONV191_EPIL]]
252 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[MUL194_EPIL]] to i64
253 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ADD189_EPIL]]
254 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR190_EPIL]], align 2
255 ; CHECK-NEXT: [[INCDEC_PTR199_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PY_8757_UNR]], i32 -3
256 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR199_EPIL]], align 2
257 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP5]], i32 [[TMP3]], i64 [[TMP1]])
258 ; CHECK-NEXT: ret i64 [[TMP6]]
261 %sum.3758.unr = sext i32 %acc to i64
265 %incdec.ptr184.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 1
266 %tmp216 = load i16, ptr %px.10756.unr, align 2
267 %conv185.epil = sext i16 %tmp216 to i32
268 %incdec.ptr186.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -1
269 %tmp217 = load i16, ptr %py.8757.unr, align 2
270 %conv187.epil = sext i16 %tmp217 to i32
271 %mul.epil = mul nsw i32 %conv187.epil, %conv185.epil
272 %conv188.epil = sext i32 %mul.epil to i64
273 %add189.epil = add nsw i64 %sum.3758.unr, %conv188.epil
274 %incdec.ptr190.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 2
275 %tmp218 = load i16, ptr %incdec.ptr184.epil, align 2
276 %conv191.epil = sext i16 %tmp218 to i32
277 %incdec.ptr192.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -2
278 %tmp219 = load i16, ptr %incdec.ptr186.epil, align 2
279 %conv193.epil = sext i16 %tmp219 to i32
280 %mul194.epil = mul nsw i32 %conv193.epil, %conv191.epil
281 %conv195.epil = sext i32 %mul194.epil to i64
282 %add196.epil = add nsw i64 %add189.epil, %conv195.epil
283 %incdec.ptr197.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 3
284 %tmp220 = load i16, ptr %incdec.ptr190.epil, align 2
285 %conv198.epil = sext i16 %tmp220 to i32
286 %incdec.ptr199.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -3
287 %tmp221 = load i16, ptr %incdec.ptr192.epil, align 2
288 %conv200.epil = sext i16 %tmp221 to i32
289 %mul201.epil = mul nsw i32 %conv200.epil, %conv198.epil
290 %conv202.epil = sext i32 %mul201.epil to i64
291 %add203.epil = add nsw i64 %add196.epil, %conv202.epil
292 %tmp222 = load i16, ptr %incdec.ptr197.epil, align 2
293 %conv205.epil = sext i16 %tmp222 to i32
294 %tmp223 = load i16, ptr %incdec.ptr199.epil, align 2
295 %conv207.epil = sext i16 %tmp223 to i32
296 %mul208.epil = mul nsw i32 %conv207.epil, %conv205.epil
297 %conv209.epil = sext i32 %mul208.epil to i64
298 %add210.epil = add nsw i64 %add203.epil, %conv209.epil
302 declare dso_local i32 @bar(i32, i32) local_unnamed_addr