1 ; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s
3 ; CHECK-LABEL: exchange_1
4 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
5 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
6 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
7 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
8 ; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
9 define i32 @exchange_1(i16* %a, i16* %b, i32 %acc) {
11 %addr.a.1 = getelementptr i16, i16* %a, i32 1
12 %addr.b.1 = getelementptr i16, i16* %b, i32 1
13 %ld.a.0 = load i16, i16* %a
14 %sext.a.0 = sext i16 %ld.a.0 to i32
15 %ld.b.0 = load i16, i16* %b
16 %ld.a.1 = load i16, i16* %addr.a.1
17 %ld.b.1 = load i16, i16* %addr.b.1
18 %sext.a.1 = sext i16 %ld.a.1 to i32
19 %sext.b.1 = sext i16 %ld.b.1 to i32
20 %sext.b.0 = sext i16 %ld.b.0 to i32
21 %mul.0 = mul i32 %sext.a.0, %sext.b.1
22 %mul.1 = mul i32 %sext.a.1, %sext.b.0
23 %add = add i32 %mul.0, %mul.1
24 %res = add i32 %add, %acc
28 ; CHECK-LABEL: exchange_2
29 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
30 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
31 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
32 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
33 ; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
34 define i32 @exchange_2(i16* %a, i16* %b, i32 %acc) {
36 %addr.a.1 = getelementptr i16, i16* %a, i32 1
37 %addr.b.1 = getelementptr i16, i16* %b, i32 1
38 %ld.a.0 = load i16, i16* %a
39 %sext.a.0 = sext i16 %ld.a.0 to i32
40 %ld.b.0 = load i16, i16* %b
41 %ld.a.1 = load i16, i16* %addr.a.1
42 %ld.b.1 = load i16, i16* %addr.b.1
43 %sext.a.1 = sext i16 %ld.a.1 to i32
44 %sext.b.1 = sext i16 %ld.b.1 to i32
45 %sext.b.0 = sext i16 %ld.b.0 to i32
46 %mul.0 = mul i32 %sext.b.1, %sext.a.0
47 %mul.1 = mul i32 %sext.b.0, %sext.a.1
48 %add = add i32 %mul.0, %mul.1
49 %res = add i32 %add, %acc
53 ; CHECK-LABEL: exchange_3
54 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
55 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
56 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
57 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
58 ; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]]
59 define i32 @exchange_3(i16* %a, i16* %b, i32 %acc) {
61 %addr.a.1 = getelementptr i16, i16* %a, i32 1
62 %addr.b.1 = getelementptr i16, i16* %b, i32 1
63 %ld.a.0 = load i16, i16* %a
64 %sext.a.0 = sext i16 %ld.a.0 to i32
65 %ld.b.0 = load i16, i16* %b
66 %ld.a.1 = load i16, i16* %addr.a.1
67 %ld.b.1 = load i16, i16* %addr.b.1
68 %sext.a.1 = sext i16 %ld.a.1 to i32
69 %sext.b.1 = sext i16 %ld.b.1 to i32
70 %sext.b.0 = sext i16 %ld.b.0 to i32
71 %mul.0 = mul i32 %sext.a.0, %sext.b.1
72 %mul.1 = mul i32 %sext.a.1, %sext.b.0
73 %add = add i32 %mul.1, %mul.0
74 %res = add i32 %add, %acc
78 ; CHECK-LABEL: exchange_4
79 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
80 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
81 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
82 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
83 ; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]]
84 define i32 @exchange_4(i16* %a, i16* %b, i32 %acc) {
86 %addr.a.1 = getelementptr i16, i16* %a, i32 1
87 %addr.b.1 = getelementptr i16, i16* %b, i32 1
88 %ld.a.0 = load i16, i16* %a
89 %sext.a.0 = sext i16 %ld.a.0 to i32
90 %ld.b.0 = load i16, i16* %b
91 %ld.a.1 = load i16, i16* %addr.a.1
92 %ld.b.1 = load i16, i16* %addr.b.1
93 %sext.a.1 = sext i16 %ld.a.1 to i32
94 %sext.b.1 = sext i16 %ld.b.1 to i32
95 %sext.b.0 = sext i16 %ld.b.0 to i32
96 %mul.0 = mul i32 %sext.b.1, %sext.a.0
97 %mul.1 = mul i32 %sext.b.0, %sext.a.1
98 %add = add i32 %mul.1, %mul.0
99 %res = add i32 %add, %acc
103 ; CHECK-LABEL: exchange_multi_use_1
104 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
105 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
106 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
107 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
108 ; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
109 ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
110 ; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
111 ; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]], i32 %acc
112 ; CHECK: call i32 @llvm.arm.smlad(i32 [[LD_A_2]], i32 [[LD_B]], i32 [[X]])
113 define i32 @exchange_multi_use_1(i16* %a, i16* %b, i32 %acc) {
115 %addr.a.1 = getelementptr i16, i16* %a, i32 1
116 %addr.b.1 = getelementptr i16, i16* %b, i32 1
117 %ld.a.0 = load i16, i16* %a
118 %sext.a.0 = sext i16 %ld.a.0 to i32
119 %ld.b.0 = load i16, i16* %b
120 %ld.a.1 = load i16, i16* %addr.a.1
121 %ld.b.1 = load i16, i16* %addr.b.1
122 %sext.a.1 = sext i16 %ld.a.1 to i32
123 %sext.b.1 = sext i16 %ld.b.1 to i32
124 %sext.b.0 = sext i16 %ld.b.0 to i32
125 %mul.0 = mul i32 %sext.a.0, %sext.b.1
126 %mul.1 = mul i32 %sext.a.1, %sext.b.0
127 %add = add i32 %mul.0, %mul.1
128 %addr.a.2 = getelementptr i16, i16* %a, i32 2
129 %addr.a.3 = getelementptr i16, i16* %a, i32 3
130 %ld.a.2 = load i16, i16* %addr.a.2
131 %ld.a.3 = load i16, i16* %addr.a.3
132 %sext.a.2 = sext i16 %ld.a.2 to i32
133 %sext.a.3 = sext i16 %ld.a.3 to i32
134 %mul.2 = mul i32 %sext.a.3, %sext.b.1
135 %mul.3 = mul i32 %sext.a.2, %sext.b.0
136 %add.1 = add i32 %mul.2, %mul.3
137 %add.2 = add i32 %add, %add.1
138 %res = add i32 %add.2, %acc
142 ; CHECK-LABEL: exchange_multi_use_64_1
143 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
144 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
145 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
146 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
147 ; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
148 ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
149 ; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
150 ; CHECK: [[X:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[LD_A]], i32 [[LD_B]], i64 %acc
151 ; CHECK: call i64 @llvm.arm.smlald(i32 [[LD_A_2]], i32 [[LD_B]], i64 [[X]])
152 define i64 @exchange_multi_use_64_1(i16* %a, i16* %b, i64 %acc) {
154 %addr.a.1 = getelementptr i16, i16* %a, i32 1
155 %addr.b.1 = getelementptr i16, i16* %b, i32 1
156 %ld.a.0 = load i16, i16* %a
157 %sext.a.0 = sext i16 %ld.a.0 to i32
158 %ld.b.0 = load i16, i16* %b
159 %ld.a.1 = load i16, i16* %addr.a.1
160 %ld.b.1 = load i16, i16* %addr.b.1
161 %sext.a.1 = sext i16 %ld.a.1 to i32
162 %sext.b.1 = sext i16 %ld.b.1 to i32
163 %sext.b.0 = sext i16 %ld.b.0 to i32
164 %mul.0 = mul i32 %sext.a.0, %sext.b.1
165 %mul.1 = mul i32 %sext.a.1, %sext.b.0
166 %add = add i32 %mul.0, %mul.1
167 %addr.a.2 = getelementptr i16, i16* %a, i32 2
168 %addr.a.3 = getelementptr i16, i16* %a, i32 3
169 %ld.a.2 = load i16, i16* %addr.a.2
170 %ld.a.3 = load i16, i16* %addr.a.3
171 %sext.a.2 = sext i16 %ld.a.2 to i32
172 %sext.a.3 = sext i16 %ld.a.3 to i32
173 %mul.2 = mul i32 %sext.a.3, %sext.b.1
174 %mul.3 = mul i32 %sext.a.2, %sext.b.0
175 %add.1 = add i32 %mul.2, %mul.3
176 %add.2 = add i32 %add, %add.1
177 %sext.add.2 = sext i32 %add.2 to i64
178 %res = add i64 %sext.add.2, %acc
182 ; CHECK-LABEL: exchange_multi_use_64_2
183 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
184 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
185 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
186 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
187 ; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
188 ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
189 ; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
190 ; CHECK: [[X:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[LD_A]], i32 [[LD_B]], i64 %acc
191 ; CHECK: call i64 @llvm.arm.smlald(i32 [[LD_A_2]], i32 [[LD_B]], i64 [[X]])
192 define i64 @exchange_multi_use_64_2(i16* %a, i16* %b, i64 %acc) {
194 %addr.a.1 = getelementptr i16, i16* %a, i32 1
195 %addr.b.1 = getelementptr i16, i16* %b, i32 1
196 %ld.a.0 = load i16, i16* %a
197 %sext.a.0 = sext i16 %ld.a.0 to i32
198 %ld.b.0 = load i16, i16* %b
199 %ld.a.1 = load i16, i16* %addr.a.1
200 %ld.b.1 = load i16, i16* %addr.b.1
201 %sext.a.1 = sext i16 %ld.a.1 to i32
202 %sext.b.1 = sext i16 %ld.b.1 to i32
203 %sext.b.0 = sext i16 %ld.b.0 to i32
204 %mul.0 = mul i32 %sext.a.0, %sext.b.1
205 %mul.1 = mul i32 %sext.a.1, %sext.b.0
206 %add = add i32 %mul.0, %mul.1
207 %sext.add = sext i32 %add to i64
208 %addr.a.2 = getelementptr i16, i16* %a, i32 2
209 %addr.a.3 = getelementptr i16, i16* %a, i32 3
210 %ld.a.2 = load i16, i16* %addr.a.2
211 %ld.a.3 = load i16, i16* %addr.a.3
212 %sext.a.2 = sext i16 %ld.a.2 to i32
213 %sext.a.3 = sext i16 %ld.a.3 to i32
214 %mul.2 = mul i32 %sext.a.3, %sext.b.1
215 %mul.3 = mul i32 %sext.a.2, %sext.b.0
216 %add.1 = add i32 %mul.2, %mul.3
217 %sext.add.1 = sext i32 %add.1 to i64
218 %add.2 = add i64 %sext.add, %sext.add.1
219 %res = add i64 %add.2, %acc
223 ; CHECK-LABEL: exchange_multi_use_2
224 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
225 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
226 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
227 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
228 ; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
229 ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
230 ; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
231 ; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 %acc
232 ; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A_2]], i32 [[X]])
233 define i32 @exchange_multi_use_2(i16* %a, i16* %b, i32 %acc) {
235 %addr.a.1 = getelementptr i16, i16* %a, i32 1
236 %addr.b.1 = getelementptr i16, i16* %b, i32 1
237 %ld.a.0 = load i16, i16* %a
238 %sext.a.0 = sext i16 %ld.a.0 to i32
239 %ld.b.0 = load i16, i16* %b
240 %ld.a.1 = load i16, i16* %addr.a.1
241 %ld.b.1 = load i16, i16* %addr.b.1
242 %sext.a.1 = sext i16 %ld.a.1 to i32
243 %sext.b.1 = sext i16 %ld.b.1 to i32
244 %sext.b.0 = sext i16 %ld.b.0 to i32
245 %mul.0 = mul i32 %sext.a.0, %sext.b.0
246 %mul.1 = mul i32 %sext.a.1, %sext.b.1
247 %add = add i32 %mul.0, %mul.1
248 %addr.a.2 = getelementptr i16, i16* %a, i32 2
249 %addr.a.3 = getelementptr i16, i16* %a, i32 3
250 %ld.a.2 = load i16, i16* %addr.a.2
251 %ld.a.3 = load i16, i16* %addr.a.3
252 %sext.a.2 = sext i16 %ld.a.2 to i32
253 %sext.a.3 = sext i16 %ld.a.3 to i32
254 %mul.2 = mul i32 %sext.b.0, %sext.a.3
255 %mul.3 = mul i32 %sext.b.1, %sext.a.2
256 %add.1 = add i32 %mul.2, %mul.3
257 %add.2 = add i32 %add, %add.1
258 %res = add i32 %add.2, %acc
262 ; TODO: Why aren't two intrinsics generated?
263 ; CHECK-LABEL: exchange_multi_use_3
264 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
265 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
266 ; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
267 ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
268 ; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
269 ; CHECK-NOT: call i32 @llvm.arm.smlad
270 ; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A_2]], i32 0
271 define i32 @exchange_multi_use_3(i16* %a, i16* %b, i32 %acc) {
273 %addr.a.1 = getelementptr i16, i16* %a, i32 1
274 %addr.b.1 = getelementptr i16, i16* %b, i32 1
275 %ld.a.0 = load i16, i16* %a
276 %sext.a.0 = sext i16 %ld.a.0 to i32
277 %ld.b.0 = load i16, i16* %b
278 %ld.a.1 = load i16, i16* %addr.a.1
279 %ld.b.1 = load i16, i16* %addr.b.1
280 %sext.a.1 = sext i16 %ld.a.1 to i32
281 %sext.b.1 = sext i16 %ld.b.1 to i32
282 %sext.b.0 = sext i16 %ld.b.0 to i32
283 %addr.a.2 = getelementptr i16, i16* %a, i32 2
284 %addr.a.3 = getelementptr i16, i16* %a, i32 3
285 %ld.a.2 = load i16, i16* %addr.a.2
286 %ld.a.3 = load i16, i16* %addr.a.3
287 %sext.a.2 = sext i16 %ld.a.2 to i32
288 %sext.a.3 = sext i16 %ld.a.3 to i32
289 %mul.2 = mul i32 %sext.b.0, %sext.a.3
290 %mul.3 = mul i32 %sext.b.1, %sext.a.2
291 %mul.0 = mul i32 %sext.a.0, %sext.b.0
292 %mul.1 = mul i32 %sext.a.1, %sext.b.1
293 %add = add i32 %mul.0, %mul.1
294 %add.1 = add i32 %mul.2, %mul.3
295 %sub = sub i32 %add, %add.1
296 %res = add i32 %acc, %sub
300 ; TODO: Would it be better to generate a smlad and then sign extend it?
301 ; CHECK-LABEL: exchange_multi_use_64_3
302 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
303 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
304 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
305 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
306 ; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
307 ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
308 ; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
309 ; CHECK: [[ACC:%[^ ]+]] = call i64 @llvm.arm.smlald(i32 [[LD_A]], i32 [[LD_B]], i64 0)
310 ; CHECK: [[X:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[LD_B]], i32 [[LD_A_2]], i64 [[ACC]])
311 define i64 @exchange_multi_use_64_3(i16* %a, i16* %b, i64 %acc) {
313 %addr.a.1 = getelementptr i16, i16* %a, i32 1
314 %addr.b.1 = getelementptr i16, i16* %b, i32 1
315 %ld.a.0 = load i16, i16* %a
316 %sext.a.0 = sext i16 %ld.a.0 to i32
317 %ld.b.0 = load i16, i16* %b
318 %ld.a.1 = load i16, i16* %addr.a.1
319 %ld.b.1 = load i16, i16* %addr.b.1
320 %sext.a.1 = sext i16 %ld.a.1 to i32
321 %sext.b.1 = sext i16 %ld.b.1 to i32
322 %sext.b.0 = sext i16 %ld.b.0 to i32
323 %addr.a.2 = getelementptr i16, i16* %a, i32 2
324 %addr.a.3 = getelementptr i16, i16* %a, i32 3
325 %ld.a.2 = load i16, i16* %addr.a.2
326 %ld.a.3 = load i16, i16* %addr.a.3
327 %sext.a.2 = sext i16 %ld.a.2 to i32
328 %sext.a.3 = sext i16 %ld.a.3 to i32
329 %mul.2 = mul i32 %sext.b.0, %sext.a.3
330 %mul.3 = mul i32 %sext.b.1, %sext.a.2
331 %mul.0 = mul i32 %sext.a.0, %sext.b.0
332 %mul.1 = mul i32 %sext.a.1, %sext.b.1
333 %add = add i32 %mul.0, %mul.1
334 %add.1 = add i32 %mul.2, %mul.3
335 %sext.add = sext i32 %add to i64
336 %sext.add.1 = sext i32 %add.1 to i64
337 %add.2 = add i64 %sext.add, %sext.add.1
338 %res = sub i64 %acc, %add.2
342 ; TODO: Why isn't smladx generated too?
343 ; CHECK-LABEL: exchange_multi_use_4
344 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
345 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
346 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
347 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
348 ; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 0
349 ; CHECK-NOT: call i32 @llvm.arm.smlad
350 define i32 @exchange_multi_use_4(i16* %a, i16* %b, i32 %acc) {
352 %addr.a.1 = getelementptr i16, i16* %a, i32 1
353 %addr.b.1 = getelementptr i16, i16* %b, i32 1
354 %ld.a.0 = load i16, i16* %a
355 %sext.a.0 = sext i16 %ld.a.0 to i32
356 %ld.b.0 = load i16, i16* %b
357 %ld.a.1 = load i16, i16* %addr.a.1
358 %ld.b.1 = load i16, i16* %addr.b.1
359 %sext.a.1 = sext i16 %ld.a.1 to i32
360 %sext.b.1 = sext i16 %ld.b.1 to i32
361 %sext.b.0 = sext i16 %ld.b.0 to i32
362 %addr.a.2 = getelementptr i16, i16* %a, i32 2
363 %addr.a.3 = getelementptr i16, i16* %a, i32 3
364 %ld.a.2 = load i16, i16* %addr.a.2
365 %ld.a.3 = load i16, i16* %addr.a.3
366 %sext.a.2 = sext i16 %ld.a.2 to i32
367 %sext.a.3 = sext i16 %ld.a.3 to i32
368 %mul.2 = mul i32 %sext.b.0, %sext.a.3
369 %mul.3 = mul i32 %sext.b.1, %sext.a.2
370 %mul.0 = mul i32 %sext.a.0, %sext.b.0
371 %mul.1 = mul i32 %sext.a.1, %sext.b.1
372 %add.1 = add i32 %mul.2, %mul.3
373 %add = add i32 %mul.0, %mul.1
374 %sub = sub i32 %add, %add.1
375 %res = add i32 %acc, %sub
379 ; CHECK-LABEL: exchange_swap
380 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
381 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
382 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
383 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
384 ; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]]
385 define i32 @exchange_swap(i16* %a, i16* %b, i32 %acc) {
387 %addr.a.1 = getelementptr i16, i16* %a, i32 1
388 %addr.b.1 = getelementptr i16, i16* %b, i32 1
389 %ld.a.0 = load i16, i16* %a
390 %sext.a.0 = sext i16 %ld.a.0 to i32
391 %ld.b.0 = load i16, i16* %b
392 %ld.a.1 = load i16, i16* %addr.a.1
393 %ld.b.1 = load i16, i16* %addr.b.1
394 %sext.a.1 = sext i16 %ld.a.1 to i32
395 %sext.b.1 = sext i16 %ld.b.1 to i32
396 %sext.b.0 = sext i16 %ld.b.0 to i32
397 %mul.0 = mul i32 %sext.a.1, %sext.b.0
398 %mul.1 = mul i32 %sext.a.0, %sext.b.1
399 %add = add i32 %mul.0, %mul.1
400 %res = add i32 %add, %acc
404 ; CHECK-LABEL: exchange_swap_2
405 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
406 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
407 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
408 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
409 ; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
410 define i32 @exchange_swap_2(i16* %a, i16* %b, i32 %acc) {
412 %addr.a.1 = getelementptr i16, i16* %a, i32 1
413 %addr.b.1 = getelementptr i16, i16* %b, i32 1
414 %ld.a.0 = load i16, i16* %a
415 %sext.a.0 = sext i16 %ld.a.0 to i32
416 %ld.b.0 = load i16, i16* %b
417 %ld.a.1 = load i16, i16* %addr.a.1
418 %ld.b.1 = load i16, i16* %addr.b.1
419 %sext.a.1 = sext i16 %ld.a.1 to i32
420 %sext.b.1 = sext i16 %ld.b.1 to i32
421 %sext.b.0 = sext i16 %ld.b.0 to i32
422 %mul.0 = mul i32 %sext.a.1, %sext.b.0
423 %mul.1 = mul i32 %sext.a.0, %sext.b.1
424 %add = add i32 %mul.1, %mul.0
425 %res = add i32 %add, %acc
429 ; CHECK-LABEL: exchange_swap_3
430 ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
431 ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
432 ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
433 ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
434 ; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
435 define i32 @exchange_swap_3(i16* %a, i16* %b, i32 %acc) {
437 %addr.a.1 = getelementptr i16, i16* %a, i32 1
438 %addr.b.1 = getelementptr i16, i16* %b, i32 1
439 %ld.a.0 = load i16, i16* %a
440 %sext.a.0 = sext i16 %ld.a.0 to i32
441 %ld.b.0 = load i16, i16* %b
442 %ld.a.1 = load i16, i16* %addr.a.1
443 %ld.b.1 = load i16, i16* %addr.b.1
444 %sext.a.1 = sext i16 %ld.a.1 to i32
445 %sext.b.1 = sext i16 %ld.b.1 to i32
446 %sext.b.0 = sext i16 %ld.b.0 to i32
447 %mul.0 = mul i32 %sext.b.0, %sext.a.1
448 %mul.1 = mul i32 %sext.b.1, %sext.a.0
449 %add = add i32 %mul.1, %mul.0
450 %res = add i32 %add, %acc