1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s
4 define i32 @exchange_1(ptr %a, ptr %b, i32 %acc) {
5 ; CHECK-LABEL: @exchange_1(
7 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
8 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
9 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
10 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
11 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
12 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
13 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
14 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
15 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
16 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
17 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
18 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
19 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
20 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]])
21 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
22 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
23 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
24 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
25 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
26 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
27 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
28 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
29 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
30 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]]
31 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]]
32 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
33 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]]
34 ; CHECK-NEXT: ret i32 [[TMP10]]
37 %addr.a.1 = getelementptr i16, ptr %a, i32 1
38 %addr.b.1 = getelementptr i16, ptr %b, i32 1
39 %ld.a.0 = load i16, ptr %a
40 %sext.a.0 = sext i16 %ld.a.0 to i32
41 %ld.b.0 = load i16, ptr %b
42 %ld.a.1 = load i16, ptr %addr.a.1
43 %ld.b.1 = load i16, ptr %addr.b.1
44 %sext.a.1 = sext i16 %ld.a.1 to i32
45 %sext.b.1 = sext i16 %ld.b.1 to i32
46 %sext.b.0 = sext i16 %ld.b.0 to i32
47 %mul.0 = mul i32 %sext.a.0, %sext.b.1
48 %mul.1 = mul i32 %sext.a.1, %sext.b.0
49 %add = add i32 %mul.0, %mul.1
50 %res = add i32 %add, %acc
54 define i32 @exchange_2(ptr %a, ptr %b, i32 %acc) {
55 ; CHECK-LABEL: @exchange_2(
57 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
58 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
59 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
60 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
61 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
62 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
63 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
64 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
65 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
66 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
67 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
68 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
69 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
70 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]])
71 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
72 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
73 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
74 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
75 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
76 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
77 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
78 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
79 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
80 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP14]], [[TMP3]]
81 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP11]], [[TMP6]]
82 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
83 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]]
84 ; CHECK-NEXT: ret i32 [[TMP10]]
87 %addr.a.1 = getelementptr i16, ptr %a, i32 1
88 %addr.b.1 = getelementptr i16, ptr %b, i32 1
89 %ld.a.0 = load i16, ptr %a
90 %sext.a.0 = sext i16 %ld.a.0 to i32
91 %ld.b.0 = load i16, ptr %b
92 %ld.a.1 = load i16, ptr %addr.a.1
93 %ld.b.1 = load i16, ptr %addr.b.1
94 %sext.a.1 = sext i16 %ld.a.1 to i32
95 %sext.b.1 = sext i16 %ld.b.1 to i32
96 %sext.b.0 = sext i16 %ld.b.0 to i32
97 %mul.0 = mul i32 %sext.b.1, %sext.a.0
98 %mul.1 = mul i32 %sext.b.0, %sext.a.1
99 %add = add i32 %mul.0, %mul.1
100 %res = add i32 %add, %acc
104 define i32 @exchange_3(ptr %a, ptr %b, i32 %acc) {
105 ; CHECK-LABEL: @exchange_3(
107 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
108 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
109 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
110 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
111 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
112 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
113 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
114 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
115 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
116 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
117 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
118 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
119 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
120 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP1]], i32 [[ACC:%.*]])
121 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
122 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
123 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
124 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
125 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
126 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
127 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
128 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
129 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
130 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]]
131 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]]
132 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_1]], [[MUL_0]]
133 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]]
134 ; CHECK-NEXT: ret i32 [[TMP10]]
137 %addr.a.1 = getelementptr i16, ptr %a, i32 1
138 %addr.b.1 = getelementptr i16, ptr %b, i32 1
139 %ld.a.0 = load i16, ptr %a
140 %sext.a.0 = sext i16 %ld.a.0 to i32
141 %ld.b.0 = load i16, ptr %b
142 %ld.a.1 = load i16, ptr %addr.a.1
143 %ld.b.1 = load i16, ptr %addr.b.1
144 %sext.a.1 = sext i16 %ld.a.1 to i32
145 %sext.b.1 = sext i16 %ld.b.1 to i32
146 %sext.b.0 = sext i16 %ld.b.0 to i32
147 %mul.0 = mul i32 %sext.a.0, %sext.b.1
148 %mul.1 = mul i32 %sext.a.1, %sext.b.0
149 %add = add i32 %mul.1, %mul.0
150 %res = add i32 %add, %acc
154 define i32 @exchange_4(ptr %a, ptr %b, i32 %acc) {
155 ; CHECK-LABEL: @exchange_4(
157 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
158 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
159 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
160 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
161 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
162 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
163 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
164 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
165 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
166 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
167 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
168 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
169 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
170 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP1]], i32 [[ACC:%.*]])
171 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
172 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
173 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
174 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
175 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
176 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
177 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
178 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
179 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
180 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP14]], [[TMP3]]
181 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP11]], [[TMP6]]
182 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_1]], [[MUL_0]]
183 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]]
184 ; CHECK-NEXT: ret i32 [[TMP10]]
187 %addr.a.1 = getelementptr i16, ptr %a, i32 1
188 %addr.b.1 = getelementptr i16, ptr %b, i32 1
189 %ld.a.0 = load i16, ptr %a
190 %sext.a.0 = sext i16 %ld.a.0 to i32
191 %ld.b.0 = load i16, ptr %b
192 %ld.a.1 = load i16, ptr %addr.a.1
193 %ld.b.1 = load i16, ptr %addr.b.1
194 %sext.a.1 = sext i16 %ld.a.1 to i32
195 %sext.b.1 = sext i16 %ld.b.1 to i32
196 %sext.b.0 = sext i16 %ld.b.0 to i32
197 %mul.0 = mul i32 %sext.b.1, %sext.a.0
198 %mul.1 = mul i32 %sext.b.0, %sext.a.1
199 %add = add i32 %mul.1, %mul.0
200 %res = add i32 %add, %acc
204 define i32 @exchange_multi_use_1(ptr %a, ptr %b, i32 %acc) {
205 ; CHECK-LABEL: @exchange_multi_use_1(
207 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
208 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
209 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
210 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
211 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
212 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
213 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
214 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
215 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
216 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
217 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
218 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
219 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
220 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]])
221 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
222 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
223 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
224 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
225 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
226 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
227 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
228 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
229 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
230 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]]
231 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]]
232 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
233 ; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
234 ; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3
235 ; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2
236 ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
237 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16
238 ; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP16]], i32 [[TMP8]], i32 [[TMP10]])
239 ; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32
240 ; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16
241 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
242 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32
243 ; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2
244 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32
245 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32
246 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP22]], [[TMP14]]
247 ; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP19]], [[TMP11]]
248 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]]
249 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD]], [[ADD_1]]
250 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD_2]], [[ACC]]
251 ; CHECK-NEXT: ret i32 [[TMP18]]
254 %addr.a.1 = getelementptr i16, ptr %a, i32 1
255 %addr.b.1 = getelementptr i16, ptr %b, i32 1
256 %ld.a.0 = load i16, ptr %a
257 %sext.a.0 = sext i16 %ld.a.0 to i32
258 %ld.b.0 = load i16, ptr %b
259 %ld.a.1 = load i16, ptr %addr.a.1
260 %ld.b.1 = load i16, ptr %addr.b.1
261 %sext.a.1 = sext i16 %ld.a.1 to i32
262 %sext.b.1 = sext i16 %ld.b.1 to i32
263 %sext.b.0 = sext i16 %ld.b.0 to i32
264 %mul.0 = mul i32 %sext.a.0, %sext.b.1
265 %mul.1 = mul i32 %sext.a.1, %sext.b.0
266 %add = add i32 %mul.0, %mul.1
267 %addr.a.2 = getelementptr i16, ptr %a, i32 2
268 %addr.a.3 = getelementptr i16, ptr %a, i32 3
269 %ld.a.2 = load i16, ptr %addr.a.2
270 %ld.a.3 = load i16, ptr %addr.a.3
271 %sext.a.2 = sext i16 %ld.a.2 to i32
272 %sext.a.3 = sext i16 %ld.a.3 to i32
273 %mul.2 = mul i32 %sext.a.3, %sext.b.1
274 %mul.3 = mul i32 %sext.a.2, %sext.b.0
275 %add.1 = add i32 %mul.2, %mul.3
276 %add.2 = add i32 %add, %add.1
277 %res = add i32 %add.2, %acc
281 define i64 @exchange_multi_use_64_1(ptr %a, ptr %b, i64 %acc) {
282 ; CHECK-LABEL: @exchange_multi_use_64_1(
284 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
285 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
286 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
287 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
288 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
289 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
290 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
291 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
292 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
293 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
294 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
295 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
296 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
297 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP1]], i32 [[TMP8]], i64 [[ACC:%.*]])
298 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
299 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
300 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
301 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
302 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
303 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
304 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
305 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
306 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
307 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]]
308 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]]
309 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
310 ; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
311 ; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3
312 ; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2
313 ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
314 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16
315 ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP16]], i32 [[TMP8]], i64 [[TMP10]])
316 ; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32
317 ; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16
318 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
319 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32
320 ; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2
321 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32
322 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32
323 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP22]], [[TMP14]]
324 ; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP19]], [[TMP11]]
325 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]]
326 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD]], [[ADD_1]]
327 ; CHECK-NEXT: [[SEXT_ADD_2:%.*]] = sext i32 [[ADD_2]] to i64
328 ; CHECK-NEXT: [[RES:%.*]] = add i64 [[SEXT_ADD_2]], [[ACC]]
329 ; CHECK-NEXT: ret i64 [[TMP18]]
332 %addr.a.1 = getelementptr i16, ptr %a, i32 1
333 %addr.b.1 = getelementptr i16, ptr %b, i32 1
334 %ld.a.0 = load i16, ptr %a
335 %sext.a.0 = sext i16 %ld.a.0 to i32
336 %ld.b.0 = load i16, ptr %b
337 %ld.a.1 = load i16, ptr %addr.a.1
338 %ld.b.1 = load i16, ptr %addr.b.1
339 %sext.a.1 = sext i16 %ld.a.1 to i32
340 %sext.b.1 = sext i16 %ld.b.1 to i32
341 %sext.b.0 = sext i16 %ld.b.0 to i32
342 %mul.0 = mul i32 %sext.a.0, %sext.b.1
343 %mul.1 = mul i32 %sext.a.1, %sext.b.0
344 %add = add i32 %mul.0, %mul.1
345 %addr.a.2 = getelementptr i16, ptr %a, i32 2
346 %addr.a.3 = getelementptr i16, ptr %a, i32 3
347 %ld.a.2 = load i16, ptr %addr.a.2
348 %ld.a.3 = load i16, ptr %addr.a.3
349 %sext.a.2 = sext i16 %ld.a.2 to i32
350 %sext.a.3 = sext i16 %ld.a.3 to i32
351 %mul.2 = mul i32 %sext.a.3, %sext.b.1
352 %mul.3 = mul i32 %sext.a.2, %sext.b.0
353 %add.1 = add i32 %mul.2, %mul.3
354 %add.2 = add i32 %add, %add.1
355 %sext.add.2 = sext i32 %add.2 to i64
356 %res = add i64 %sext.add.2, %acc
360 define i64 @exchange_multi_use_64_2(ptr %a, ptr %b, i64 %acc) {
361 ; CHECK-LABEL: @exchange_multi_use_64_2(
363 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
364 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
365 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
366 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
367 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
368 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
369 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
370 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
371 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
372 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
373 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
374 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
375 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
376 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP1]], i32 [[TMP8]], i64 [[ACC:%.*]])
377 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
378 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
379 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
380 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
381 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
382 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
383 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
384 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
385 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
386 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]]
387 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]]
388 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
389 ; CHECK-NEXT: [[SEXT_ADD:%.*]] = sext i32 [[ADD]] to i64
390 ; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
391 ; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3
392 ; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2
393 ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
394 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16
395 ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP16]], i32 [[TMP8]], i64 [[TMP10]])
396 ; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32
397 ; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16
398 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
399 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32
400 ; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2
401 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32
402 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32
403 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP22]], [[TMP14]]
404 ; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP19]], [[TMP11]]
405 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]]
406 ; CHECK-NEXT: [[SEXT_ADD_1:%.*]] = sext i32 [[ADD_1]] to i64
407 ; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[SEXT_ADD]], [[SEXT_ADD_1]]
408 ; CHECK-NEXT: [[RES:%.*]] = add i64 [[ADD_2]], [[ACC]]
409 ; CHECK-NEXT: ret i64 [[TMP18]]
412 %addr.a.1 = getelementptr i16, ptr %a, i32 1
413 %addr.b.1 = getelementptr i16, ptr %b, i32 1
414 %ld.a.0 = load i16, ptr %a
415 %sext.a.0 = sext i16 %ld.a.0 to i32
416 %ld.b.0 = load i16, ptr %b
417 %ld.a.1 = load i16, ptr %addr.a.1
418 %ld.b.1 = load i16, ptr %addr.b.1
419 %sext.a.1 = sext i16 %ld.a.1 to i32
420 %sext.b.1 = sext i16 %ld.b.1 to i32
421 %sext.b.0 = sext i16 %ld.b.0 to i32
422 %mul.0 = mul i32 %sext.a.0, %sext.b.1
423 %mul.1 = mul i32 %sext.a.1, %sext.b.0
424 %add = add i32 %mul.0, %mul.1
425 %sext.add = sext i32 %add to i64
426 %addr.a.2 = getelementptr i16, ptr %a, i32 2
427 %addr.a.3 = getelementptr i16, ptr %a, i32 3
428 %ld.a.2 = load i16, ptr %addr.a.2
429 %ld.a.3 = load i16, ptr %addr.a.3
430 %sext.a.2 = sext i16 %ld.a.2 to i32
431 %sext.a.3 = sext i16 %ld.a.3 to i32
432 %mul.2 = mul i32 %sext.a.3, %sext.b.1
433 %mul.3 = mul i32 %sext.a.2, %sext.b.0
434 %add.1 = add i32 %mul.2, %mul.3
435 %sext.add.1 = sext i32 %add.1 to i64
436 %add.2 = add i64 %sext.add, %sext.add.1
437 %res = add i64 %add.2, %acc
441 define i32 @exchange_multi_use_2(ptr %a, ptr %b, i32 %acc) {
442 ; CHECK-LABEL: @exchange_multi_use_2(
444 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
445 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
446 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
447 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
448 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
449 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
450 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
451 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
452 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
453 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
454 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
455 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
456 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
457 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]])
458 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
459 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
460 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
461 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
462 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
463 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
464 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
465 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
466 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
467 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP11]]
468 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP14]]
469 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
470 ; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
471 ; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3
472 ; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2
473 ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
474 ; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16
475 ; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP16]], i32 [[TMP10]])
476 ; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32
477 ; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16
478 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
479 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32
480 ; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2
481 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32
482 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32
483 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP11]], [[TMP22]]
484 ; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP14]], [[TMP19]]
485 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]]
486 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD]], [[ADD_1]]
487 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD_2]], [[ACC]]
488 ; CHECK-NEXT: ret i32 [[TMP18]]
491 %addr.a.1 = getelementptr i16, ptr %a, i32 1
492 %addr.b.1 = getelementptr i16, ptr %b, i32 1
493 %ld.a.0 = load i16, ptr %a
494 %sext.a.0 = sext i16 %ld.a.0 to i32
495 %ld.b.0 = load i16, ptr %b
496 %ld.a.1 = load i16, ptr %addr.a.1
497 %ld.b.1 = load i16, ptr %addr.b.1
498 %sext.a.1 = sext i16 %ld.a.1 to i32
499 %sext.b.1 = sext i16 %ld.b.1 to i32
500 %sext.b.0 = sext i16 %ld.b.0 to i32
501 %mul.0 = mul i32 %sext.a.0, %sext.b.0
502 %mul.1 = mul i32 %sext.a.1, %sext.b.1
503 %add = add i32 %mul.0, %mul.1
504 %addr.a.2 = getelementptr i16, ptr %a, i32 2
505 %addr.a.3 = getelementptr i16, ptr %a, i32 3
506 %ld.a.2 = load i16, ptr %addr.a.2
507 %ld.a.3 = load i16, ptr %addr.a.3
508 %sext.a.2 = sext i16 %ld.a.2 to i32
509 %sext.a.3 = sext i16 %ld.a.3 to i32
510 %mul.2 = mul i32 %sext.b.0, %sext.a.3
511 %mul.3 = mul i32 %sext.b.1, %sext.a.2
512 %add.1 = add i32 %mul.2, %mul.3
513 %add.2 = add i32 %add, %add.1
514 %res = add i32 %add.2, %acc
518 ; TODO: Why aren't two intrinsics generated?
519 define i32 @exchange_multi_use_3(ptr %a, ptr %b, i32 %acc) {
520 ; CHECK-LABEL: @exchange_multi_use_3(
522 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
523 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
524 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
525 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
526 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
527 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 2
528 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
529 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
530 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
531 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
532 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
533 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
534 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
535 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
536 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
537 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
538 ; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
539 ; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3
540 ; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2
541 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
542 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
543 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 0)
544 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
545 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
546 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
547 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
548 ; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2
549 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32
550 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32
551 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP3]], [[TMP14]]
552 ; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP6]], [[TMP11]]
553 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[SEXT_A_0]], [[TMP3]]
554 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[SEXT_A_1]], [[TMP6]]
555 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
556 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]]
557 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[ADD]], [[TMP10]]
558 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[ACC:%.*]], [[SUB]]
559 ; CHECK-NEXT: ret i32 [[RES]]
562 %addr.a.1 = getelementptr i16, ptr %a, i32 1
563 %addr.b.1 = getelementptr i16, ptr %b, i32 1
564 %ld.a.0 = load i16, ptr %a
565 %sext.a.0 = sext i16 %ld.a.0 to i32
566 %ld.b.0 = load i16, ptr %b
567 %ld.a.1 = load i16, ptr %addr.a.1
568 %ld.b.1 = load i16, ptr %addr.b.1
569 %sext.a.1 = sext i16 %ld.a.1 to i32
570 %sext.b.1 = sext i16 %ld.b.1 to i32
571 %sext.b.0 = sext i16 %ld.b.0 to i32
572 %addr.a.2 = getelementptr i16, ptr %a, i32 2
573 %addr.a.3 = getelementptr i16, ptr %a, i32 3
574 %ld.a.2 = load i16, ptr %addr.a.2
575 %ld.a.3 = load i16, ptr %addr.a.3
576 %sext.a.2 = sext i16 %ld.a.2 to i32
577 %sext.a.3 = sext i16 %ld.a.3 to i32
578 %mul.2 = mul i32 %sext.b.0, %sext.a.3
579 %mul.3 = mul i32 %sext.b.1, %sext.a.2
580 %mul.0 = mul i32 %sext.a.0, %sext.b.0
581 %mul.1 = mul i32 %sext.a.1, %sext.b.1
582 %add = add i32 %mul.0, %mul.1
583 %add.1 = add i32 %mul.2, %mul.3
584 %sub = sub i32 %add, %add.1
585 %res = add i32 %acc, %sub
589 ; TODO: Would it be better to generate a smlad and then sign extend it?
590 define i64 @exchange_multi_use_64_3(ptr %a, ptr %b, i64 %acc) {
591 ; CHECK-LABEL: @exchange_multi_use_64_3(
593 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
594 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
595 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
596 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
597 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
598 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
599 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
600 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
601 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
602 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
603 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
604 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
605 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
606 ; CHECK-NEXT: [[TMP10:%.*]] = sext i16 [[TMP9]] to i32
607 ; CHECK-NEXT: [[TMP11:%.*]] = lshr i32 [[TMP8]], 16
608 ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16
609 ; CHECK-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32
610 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
611 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
612 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
613 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
614 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
615 ; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
616 ; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3
617 ; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2
618 ; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
619 ; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16
620 ; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP8]], i32 [[TMP15]], i64 0)
621 ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP8]], i64 [[TMP17]])
622 ; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP16]] to i32
623 ; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP15]], 16
624 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
625 ; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32
626 ; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2
627 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32
628 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32
629 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP10]], [[TMP22]]
630 ; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP13]], [[TMP19]]
631 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP10]]
632 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP13]]
633 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
634 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]]
635 ; CHECK-NEXT: [[SEXT_ADD:%.*]] = sext i32 [[ADD]] to i64
636 ; CHECK-NEXT: [[SEXT_ADD_1:%.*]] = sext i32 [[ADD_1]] to i64
637 ; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[SEXT_ADD]], [[SEXT_ADD_1]]
638 ; CHECK-NEXT: [[RES:%.*]] = sub i64 [[ACC:%.*]], [[TMP18]]
639 ; CHECK-NEXT: ret i64 [[RES]]
642 %addr.a.1 = getelementptr i16, ptr %a, i32 1
643 %addr.b.1 = getelementptr i16, ptr %b, i32 1
644 %ld.a.0 = load i16, ptr %a
645 %sext.a.0 = sext i16 %ld.a.0 to i32
646 %ld.b.0 = load i16, ptr %b
647 %ld.a.1 = load i16, ptr %addr.a.1
648 %ld.b.1 = load i16, ptr %addr.b.1
649 %sext.a.1 = sext i16 %ld.a.1 to i32
650 %sext.b.1 = sext i16 %ld.b.1 to i32
651 %sext.b.0 = sext i16 %ld.b.0 to i32
652 %addr.a.2 = getelementptr i16, ptr %a, i32 2
653 %addr.a.3 = getelementptr i16, ptr %a, i32 3
654 %ld.a.2 = load i16, ptr %addr.a.2
655 %ld.a.3 = load i16, ptr %addr.a.3
656 %sext.a.2 = sext i16 %ld.a.2 to i32
657 %sext.a.3 = sext i16 %ld.a.3 to i32
658 %mul.2 = mul i32 %sext.b.0, %sext.a.3
659 %mul.3 = mul i32 %sext.b.1, %sext.a.2
660 %mul.0 = mul i32 %sext.a.0, %sext.b.0
661 %mul.1 = mul i32 %sext.a.1, %sext.b.1
662 %add = add i32 %mul.0, %mul.1
663 %add.1 = add i32 %mul.2, %mul.3
664 %sext.add = sext i32 %add to i64
665 %sext.add.1 = sext i32 %add.1 to i64
666 %add.2 = add i64 %sext.add, %sext.add.1
667 %res = sub i64 %acc, %add.2
671 ; TODO: Why isn't smladx generated too?
672 define i32 @exchange_multi_use_4(ptr %a, ptr %b, i32 %acc) {
673 ; CHECK-LABEL: @exchange_multi_use_4(
675 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
676 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
677 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
678 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
679 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
680 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
681 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
682 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
683 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
684 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
685 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
686 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
687 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
688 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP8]], i32 0)
689 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
690 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
691 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
692 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
693 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
694 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
695 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
696 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
697 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
698 ; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
699 ; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3
700 ; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2
701 ; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2
702 ; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32
703 ; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32
704 ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP11]], [[SEXT_A_3]]
705 ; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP14]], [[SEXT_A_2]]
706 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP11]]
707 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP14]]
708 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]]
709 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
710 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP10]], [[ADD_1]]
711 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[ACC:%.*]], [[SUB]]
712 ; CHECK-NEXT: ret i32 [[RES]]
715 %addr.a.1 = getelementptr i16, ptr %a, i32 1
716 %addr.b.1 = getelementptr i16, ptr %b, i32 1
717 %ld.a.0 = load i16, ptr %a
718 %sext.a.0 = sext i16 %ld.a.0 to i32
719 %ld.b.0 = load i16, ptr %b
720 %ld.a.1 = load i16, ptr %addr.a.1
721 %ld.b.1 = load i16, ptr %addr.b.1
722 %sext.a.1 = sext i16 %ld.a.1 to i32
723 %sext.b.1 = sext i16 %ld.b.1 to i32
724 %sext.b.0 = sext i16 %ld.b.0 to i32
725 %addr.a.2 = getelementptr i16, ptr %a, i32 2
726 %addr.a.3 = getelementptr i16, ptr %a, i32 3
727 %ld.a.2 = load i16, ptr %addr.a.2
728 %ld.a.3 = load i16, ptr %addr.a.3
729 %sext.a.2 = sext i16 %ld.a.2 to i32
730 %sext.a.3 = sext i16 %ld.a.3 to i32
731 %mul.2 = mul i32 %sext.b.0, %sext.a.3
732 %mul.3 = mul i32 %sext.b.1, %sext.a.2
733 %mul.0 = mul i32 %sext.a.0, %sext.b.0
734 %mul.1 = mul i32 %sext.a.1, %sext.b.1
735 %add.1 = add i32 %mul.2, %mul.3
736 %add = add i32 %mul.0, %mul.1
737 %sub = sub i32 %add, %add.1
738 %res = add i32 %acc, %sub
742 define i32 @exchange_swap(ptr %a, ptr %b, i32 %acc) {
743 ; CHECK-LABEL: @exchange_swap(
745 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
746 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
747 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
748 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
749 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
750 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
751 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
752 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
753 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
754 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
755 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
756 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
757 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
758 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP1]], i32 [[ACC:%.*]])
759 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
760 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
761 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
762 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
763 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
764 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
765 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
766 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
767 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
768 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP6]], [[TMP11]]
769 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP3]], [[TMP14]]
770 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
771 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]]
772 ; CHECK-NEXT: ret i32 [[TMP10]]
775 %addr.a.1 = getelementptr i16, ptr %a, i32 1
776 %addr.b.1 = getelementptr i16, ptr %b, i32 1
777 %ld.a.0 = load i16, ptr %a
778 %sext.a.0 = sext i16 %ld.a.0 to i32
779 %ld.b.0 = load i16, ptr %b
780 %ld.a.1 = load i16, ptr %addr.a.1
781 %ld.b.1 = load i16, ptr %addr.b.1
782 %sext.a.1 = sext i16 %ld.a.1 to i32
783 %sext.b.1 = sext i16 %ld.b.1 to i32
784 %sext.b.0 = sext i16 %ld.b.0 to i32
785 %mul.0 = mul i32 %sext.a.1, %sext.b.0
786 %mul.1 = mul i32 %sext.a.0, %sext.b.1
787 %add = add i32 %mul.0, %mul.1
788 %res = add i32 %add, %acc
792 define i32 @exchange_swap_2(ptr %a, ptr %b, i32 %acc) {
793 ; CHECK-LABEL: @exchange_swap_2(
795 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
796 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
797 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
798 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
799 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
800 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
801 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
802 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
803 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
804 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
805 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
806 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
807 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
808 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]])
809 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
810 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
811 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
812 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
813 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
814 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
815 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
816 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
817 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
818 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP6]], [[TMP11]]
819 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP3]], [[TMP14]]
820 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_1]], [[MUL_0]]
821 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]]
822 ; CHECK-NEXT: ret i32 [[TMP10]]
825 %addr.a.1 = getelementptr i16, ptr %a, i32 1
826 %addr.b.1 = getelementptr i16, ptr %b, i32 1
827 %ld.a.0 = load i16, ptr %a
828 %sext.a.0 = sext i16 %ld.a.0 to i32
829 %ld.b.0 = load i16, ptr %b
830 %ld.a.1 = load i16, ptr %addr.a.1
831 %ld.b.1 = load i16, ptr %addr.b.1
832 %sext.a.1 = sext i16 %ld.a.1 to i32
833 %sext.b.1 = sext i16 %ld.b.1 to i32
834 %sext.b.0 = sext i16 %ld.b.0 to i32
835 %mul.0 = mul i32 %sext.a.1, %sext.b.0
836 %mul.1 = mul i32 %sext.a.0, %sext.b.1
837 %add = add i32 %mul.1, %mul.0
838 %res = add i32 %add, %acc
842 define i32 @exchange_swap_3(ptr %a, ptr %b, i32 %acc) {
843 ; CHECK-LABEL: @exchange_swap_3(
845 ; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1
846 ; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1
847 ; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2
848 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2
849 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
850 ; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
851 ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
852 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
853 ; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
854 ; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
855 ; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2
856 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2
857 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
858 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]])
859 ; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
860 ; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
861 ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
862 ; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
863 ; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
864 ; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
865 ; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
866 ; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
867 ; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
868 ; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP11]], [[TMP6]]
869 ; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP14]], [[TMP3]]
870 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_1]], [[MUL_0]]
871 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]]
872 ; CHECK-NEXT: ret i32 [[TMP10]]
875 %addr.a.1 = getelementptr i16, ptr %a, i32 1
876 %addr.b.1 = getelementptr i16, ptr %b, i32 1
877 %ld.a.0 = load i16, ptr %a
878 %sext.a.0 = sext i16 %ld.a.0 to i32
879 %ld.b.0 = load i16, ptr %b
880 %ld.a.1 = load i16, ptr %addr.a.1
881 %ld.b.1 = load i16, ptr %addr.b.1
882 %sext.a.1 = sext i16 %ld.a.1 to i32
883 %sext.b.1 = sext i16 %ld.b.1 to i32
884 %sext.b.0 = sext i16 %ld.b.0 to i32
885 %mul.0 = mul i32 %sext.b.0, %sext.a.1
886 %mul.1 = mul i32 %sext.b.1, %sext.a.0
887 %add = add i32 %mul.1, %mul.0
888 %res = add i32 %add, %acc