1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple aarch64 -o - | FileCheck %s
4 define <8 x i16> @extadds_v8i8_i16(<8 x i8> %s0, <8 x i8> %s1) {
5 ; CHECK-LABEL: extadds_v8i8_i16:
6 ; CHECK: // %bb.0: // %entry
7 ; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b
10 %s0s = sext <8 x i8> %s0 to <8 x i16>
11 %s1s = sext <8 x i8> %s1 to <8 x i16>
12 %m = add <8 x i16> %s0s, %s1s
16 define <8 x i16> @extaddu_v8i8_i16(<8 x i8> %s0, <8 x i8> %s1) {
17 ; CHECK-LABEL: extaddu_v8i8_i16:
18 ; CHECK: // %bb.0: // %entry
19 ; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
22 %s0s = zext <8 x i8> %s0 to <8 x i16>
23 %s1s = zext <8 x i8> %s1 to <8 x i16>
24 %m = add <8 x i16> %s0s, %s1s
28 define <16 x i16> @extadds_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1) {
29 ; CHECK-LABEL: extadds_v16i8_i16:
30 ; CHECK: // %bb.0: // %entry
31 ; CHECK-NEXT: saddl2 v2.8h, v0.16b, v1.16b
32 ; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b
33 ; CHECK-NEXT: mov v1.16b, v2.16b
36 %s0s = sext <16 x i8> %s0 to <16 x i16>
37 %s1s = sext <16 x i8> %s1 to <16 x i16>
38 %m = add <16 x i16> %s0s, %s1s
42 define <16 x i16> @extaddu_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1) {
43 ; CHECK-LABEL: extaddu_v16i8_i16:
44 ; CHECK: // %bb.0: // %entry
45 ; CHECK-NEXT: uaddl2 v2.8h, v0.16b, v1.16b
46 ; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
47 ; CHECK-NEXT: mov v1.16b, v2.16b
50 %s0s = zext <16 x i8> %s0 to <16 x i16>
51 %s1s = zext <16 x i8> %s1 to <16 x i16>
52 %m = add <16 x i16> %s0s, %s1s
56 define <32 x i16> @extadds_v32i8_i16(<32 x i8> %s0, <32 x i8> %s1) {
57 ; CHECK-LABEL: extadds_v32i8_i16:
58 ; CHECK: // %bb.0: // %entry
59 ; CHECK-NEXT: saddl2 v4.8h, v1.16b, v3.16b
60 ; CHECK-NEXT: saddl v5.8h, v0.8b, v2.8b
61 ; CHECK-NEXT: saddl2 v6.8h, v0.16b, v2.16b
62 ; CHECK-NEXT: saddl v2.8h, v1.8b, v3.8b
63 ; CHECK-NEXT: mov v0.16b, v5.16b
64 ; CHECK-NEXT: mov v1.16b, v6.16b
65 ; CHECK-NEXT: mov v3.16b, v4.16b
68 %s0s = sext <32 x i8> %s0 to <32 x i16>
69 %s1s = sext <32 x i8> %s1 to <32 x i16>
70 %m = add <32 x i16> %s0s, %s1s
74 define <32 x i16> @extaddu_v32i8_i16(<32 x i8> %s0, <32 x i8> %s1) {
75 ; CHECK-LABEL: extaddu_v32i8_i16:
76 ; CHECK: // %bb.0: // %entry
77 ; CHECK-NEXT: uaddl2 v4.8h, v1.16b, v3.16b
78 ; CHECK-NEXT: uaddl v5.8h, v0.8b, v2.8b
79 ; CHECK-NEXT: uaddl2 v6.8h, v0.16b, v2.16b
80 ; CHECK-NEXT: uaddl v2.8h, v1.8b, v3.8b
81 ; CHECK-NEXT: mov v0.16b, v5.16b
82 ; CHECK-NEXT: mov v1.16b, v6.16b
83 ; CHECK-NEXT: mov v3.16b, v4.16b
86 %s0s = zext <32 x i8> %s0 to <32 x i16>
87 %s1s = zext <32 x i8> %s1 to <32 x i16>
88 %m = add <32 x i16> %s0s, %s1s
92 define <8 x i32> @extadds_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) {
93 ; CHECK-LABEL: extadds_v8i8_i32:
94 ; CHECK: // %bb.0: // %entry
95 ; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b
96 ; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0
97 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
100 %s0s = sext <8 x i8> %s0 to <8 x i32>
101 %s1s = sext <8 x i8> %s1 to <8 x i32>
102 %m = add <8 x i32> %s0s, %s1s
106 define <8 x i32> @extaddu_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) {
107 ; CHECK-LABEL: extaddu_v8i8_i32:
108 ; CHECK: // %bb.0: // %entry
109 ; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
110 ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
111 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
114 %s0s = zext <8 x i8> %s0 to <8 x i32>
115 %s1s = zext <8 x i8> %s1 to <8 x i32>
116 %m = add <8 x i32> %s0s, %s1s
120 define <16 x i32> @extadds_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) {
121 ; CHECK-LABEL: extadds_v16i8_i32:
122 ; CHECK: // %bb.0: // %entry
123 ; CHECK-NEXT: saddl v2.8h, v0.8b, v1.8b
124 ; CHECK-NEXT: saddl2 v4.8h, v0.16b, v1.16b
125 ; CHECK-NEXT: sshll v0.4s, v2.4h, #0
126 ; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0
127 ; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0
128 ; CHECK-NEXT: sshll v2.4s, v4.4h, #0
131 %s0s = sext <16 x i8> %s0 to <16 x i32>
132 %s1s = sext <16 x i8> %s1 to <16 x i32>
133 %m = add <16 x i32> %s0s, %s1s
137 define <16 x i32> @extaddu_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) {
138 ; CHECK-LABEL: extaddu_v16i8_i32:
139 ; CHECK: // %bb.0: // %entry
140 ; CHECK-NEXT: uaddl v2.8h, v0.8b, v1.8b
141 ; CHECK-NEXT: uaddl2 v4.8h, v0.16b, v1.16b
142 ; CHECK-NEXT: ushll v0.4s, v2.4h, #0
143 ; CHECK-NEXT: ushll2 v3.4s, v4.8h, #0
144 ; CHECK-NEXT: ushll2 v1.4s, v2.8h, #0
145 ; CHECK-NEXT: ushll v2.4s, v4.4h, #0
148 %s0s = zext <16 x i8> %s0 to <16 x i32>
149 %s1s = zext <16 x i8> %s1 to <16 x i32>
150 %m = add <16 x i32> %s0s, %s1s
154 define <8 x i64> @extadds_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) {
155 ; CHECK-LABEL: extadds_v8i8_i64:
156 ; CHECK: // %bb.0: // %entry
157 ; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b
158 ; CHECK-NEXT: sshll v1.4s, v0.4h, #0
159 ; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
160 ; CHECK-NEXT: sshll v0.2d, v1.2s, #0
161 ; CHECK-NEXT: sshll2 v3.2d, v2.4s, #0
162 ; CHECK-NEXT: sshll2 v1.2d, v1.4s, #0
163 ; CHECK-NEXT: sshll v2.2d, v2.2s, #0
166 %s0s = sext <8 x i8> %s0 to <8 x i64>
167 %s1s = sext <8 x i8> %s1 to <8 x i64>
168 %m = add <8 x i64> %s0s, %s1s
172 define <8 x i64> @extaddu_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) {
173 ; CHECK-LABEL: extaddu_v8i8_i64:
174 ; CHECK: // %bb.0: // %entry
175 ; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
176 ; CHECK-NEXT: ushll v1.4s, v0.4h, #0
177 ; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
178 ; CHECK-NEXT: ushll v0.2d, v1.2s, #0
179 ; CHECK-NEXT: ushll2 v3.2d, v2.4s, #0
180 ; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
181 ; CHECK-NEXT: ushll v2.2d, v2.2s, #0
184 %s0s = zext <8 x i8> %s0 to <8 x i64>
185 %s1s = zext <8 x i8> %s1 to <8 x i64>
186 %m = add <8 x i64> %s0s, %s1s
190 define <4 x i32> @extadds_v4i16_i32(<4 x i16> %s0, <4 x i16> %s1) {
191 ; CHECK-LABEL: extadds_v4i16_i32:
192 ; CHECK: // %bb.0: // %entry
193 ; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h
196 %s0s = sext <4 x i16> %s0 to <4 x i32>
197 %s1s = sext <4 x i16> %s1 to <4 x i32>
198 %m = add <4 x i32> %s0s, %s1s
202 define <4 x i32> @extaddu_v4i16_i32(<4 x i16> %s0, <4 x i16> %s1) {
203 ; CHECK-LABEL: extaddu_v4i16_i32:
204 ; CHECK: // %bb.0: // %entry
205 ; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h
208 %s0s = zext <4 x i16> %s0 to <4 x i32>
209 %s1s = zext <4 x i16> %s1 to <4 x i32>
210 %m = add <4 x i32> %s0s, %s1s
214 define <8 x i32> @extadds_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1) {
215 ; CHECK-LABEL: extadds_v8i16_i32:
216 ; CHECK: // %bb.0: // %entry
217 ; CHECK-NEXT: saddl2 v2.4s, v0.8h, v1.8h
218 ; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h
219 ; CHECK-NEXT: mov v1.16b, v2.16b
222 %s0s = sext <8 x i16> %s0 to <8 x i32>
223 %s1s = sext <8 x i16> %s1 to <8 x i32>
224 %m = add <8 x i32> %s0s, %s1s
228 define <8 x i32> @extaddu_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1) {
229 ; CHECK-LABEL: extaddu_v8i16_i32:
230 ; CHECK: // %bb.0: // %entry
231 ; CHECK-NEXT: uaddl2 v2.4s, v0.8h, v1.8h
232 ; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h
233 ; CHECK-NEXT: mov v1.16b, v2.16b
236 %s0s = zext <8 x i16> %s0 to <8 x i32>
237 %s1s = zext <8 x i16> %s1 to <8 x i32>
238 %m = add <8 x i32> %s0s, %s1s
242 define <16 x i32> @extadds_v16i16_i32(<16 x i16> %s0, <16 x i16> %s1) {
243 ; CHECK-LABEL: extadds_v16i16_i32:
244 ; CHECK: // %bb.0: // %entry
245 ; CHECK-NEXT: saddl2 v4.4s, v1.8h, v3.8h
246 ; CHECK-NEXT: saddl v5.4s, v0.4h, v2.4h
247 ; CHECK-NEXT: saddl2 v6.4s, v0.8h, v2.8h
248 ; CHECK-NEXT: saddl v2.4s, v1.4h, v3.4h
249 ; CHECK-NEXT: mov v0.16b, v5.16b
250 ; CHECK-NEXT: mov v1.16b, v6.16b
251 ; CHECK-NEXT: mov v3.16b, v4.16b
254 %s0s = sext <16 x i16> %s0 to <16 x i32>
255 %s1s = sext <16 x i16> %s1 to <16 x i32>
256 %m = add <16 x i32> %s0s, %s1s
260 define <16 x i32> @extaddu_v16i16_i32(<16 x i16> %s0, <16 x i16> %s1) {
261 ; CHECK-LABEL: extaddu_v16i16_i32:
262 ; CHECK: // %bb.0: // %entry
263 ; CHECK-NEXT: uaddl2 v4.4s, v1.8h, v3.8h
264 ; CHECK-NEXT: uaddl v5.4s, v0.4h, v2.4h
265 ; CHECK-NEXT: uaddl2 v6.4s, v0.8h, v2.8h
266 ; CHECK-NEXT: uaddl v2.4s, v1.4h, v3.4h
267 ; CHECK-NEXT: mov v0.16b, v5.16b
268 ; CHECK-NEXT: mov v1.16b, v6.16b
269 ; CHECK-NEXT: mov v3.16b, v4.16b
272 %s0s = zext <16 x i16> %s0 to <16 x i32>
273 %s1s = zext <16 x i16> %s1 to <16 x i32>
274 %m = add <16 x i32> %s0s, %s1s
278 define <4 x i64> @extadds_v4i16_i64(<4 x i16> %s0, <4 x i16> %s1) {
279 ; CHECK-LABEL: extadds_v4i16_i64:
280 ; CHECK: // %bb.0: // %entry
281 ; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h
282 ; CHECK-NEXT: sshll2 v1.2d, v0.4s, #0
283 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0
286 %s0s = sext <4 x i16> %s0 to <4 x i64>
287 %s1s = sext <4 x i16> %s1 to <4 x i64>
288 %m = add <4 x i64> %s0s, %s1s
292 define <4 x i64> @extaddu_v4i16_i64(<4 x i16> %s0, <4 x i16> %s1) {
293 ; CHECK-LABEL: extaddu_v4i16_i64:
294 ; CHECK: // %bb.0: // %entry
295 ; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h
296 ; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0
297 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
300 %s0s = zext <4 x i16> %s0 to <4 x i64>
301 %s1s = zext <4 x i16> %s1 to <4 x i64>
302 %m = add <4 x i64> %s0s, %s1s
306 define <8 x i64> @extadds_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) {
307 ; CHECK-LABEL: extadds_v8i16_i64:
308 ; CHECK: // %bb.0: // %entry
309 ; CHECK-NEXT: saddl v2.4s, v0.4h, v1.4h
310 ; CHECK-NEXT: saddl2 v4.4s, v0.8h, v1.8h
311 ; CHECK-NEXT: sshll v0.2d, v2.2s, #0
312 ; CHECK-NEXT: sshll2 v3.2d, v4.4s, #0
313 ; CHECK-NEXT: sshll2 v1.2d, v2.4s, #0
314 ; CHECK-NEXT: sshll v2.2d, v4.2s, #0
317 %s0s = sext <8 x i16> %s0 to <8 x i64>
318 %s1s = sext <8 x i16> %s1 to <8 x i64>
319 %m = add <8 x i64> %s0s, %s1s
323 define <8 x i64> @extaddu_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) {
324 ; CHECK-LABEL: extaddu_v8i16_i64:
325 ; CHECK: // %bb.0: // %entry
326 ; CHECK-NEXT: uaddl v2.4s, v0.4h, v1.4h
327 ; CHECK-NEXT: uaddl2 v4.4s, v0.8h, v1.8h
328 ; CHECK-NEXT: ushll v0.2d, v2.2s, #0
329 ; CHECK-NEXT: ushll2 v3.2d, v4.4s, #0
330 ; CHECK-NEXT: ushll2 v1.2d, v2.4s, #0
331 ; CHECK-NEXT: ushll v2.2d, v4.2s, #0
334 %s0s = zext <8 x i16> %s0 to <8 x i64>
335 %s1s = zext <8 x i16> %s1 to <8 x i64>
336 %m = add <8 x i64> %s0s, %s1s
340 define <2 x i64> @extadds_v2i32_i64(<2 x i32> %s0, <2 x i32> %s1) {
341 ; CHECK-LABEL: extadds_v2i32_i64:
342 ; CHECK: // %bb.0: // %entry
343 ; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s
346 %s0s = sext <2 x i32> %s0 to <2 x i64>
347 %s1s = sext <2 x i32> %s1 to <2 x i64>
348 %m = add <2 x i64> %s0s, %s1s
352 define <2 x i64> @extaddu_v2i32_i64(<2 x i32> %s0, <2 x i32> %s1) {
353 ; CHECK-LABEL: extaddu_v2i32_i64:
354 ; CHECK: // %bb.0: // %entry
355 ; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s
358 %s0s = zext <2 x i32> %s0 to <2 x i64>
359 %s1s = zext <2 x i32> %s1 to <2 x i64>
360 %m = add <2 x i64> %s0s, %s1s
364 define <4 x i64> @extadds_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1) {
365 ; CHECK-LABEL: extadds_v4i32_i64:
366 ; CHECK: // %bb.0: // %entry
367 ; CHECK-NEXT: saddl2 v2.2d, v0.4s, v1.4s
368 ; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s
369 ; CHECK-NEXT: mov v1.16b, v2.16b
372 %s0s = sext <4 x i32> %s0 to <4 x i64>
373 %s1s = sext <4 x i32> %s1 to <4 x i64>
374 %m = add <4 x i64> %s0s, %s1s
378 define <4 x i64> @extaddu_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1) {
379 ; CHECK-LABEL: extaddu_v4i32_i64:
380 ; CHECK: // %bb.0: // %entry
381 ; CHECK-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
382 ; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s
383 ; CHECK-NEXT: mov v1.16b, v2.16b
386 %s0s = zext <4 x i32> %s0 to <4 x i64>
387 %s1s = zext <4 x i32> %s1 to <4 x i64>
388 %m = add <4 x i64> %s0s, %s1s
392 define <8 x i64> @extadds_v8i32_i64(<8 x i32> %s0, <8 x i32> %s1) {
393 ; CHECK-LABEL: extadds_v8i32_i64:
394 ; CHECK: // %bb.0: // %entry
395 ; CHECK-NEXT: saddl2 v4.2d, v1.4s, v3.4s
396 ; CHECK-NEXT: saddl v5.2d, v0.2s, v2.2s
397 ; CHECK-NEXT: saddl2 v6.2d, v0.4s, v2.4s
398 ; CHECK-NEXT: saddl v2.2d, v1.2s, v3.2s
399 ; CHECK-NEXT: mov v0.16b, v5.16b
400 ; CHECK-NEXT: mov v1.16b, v6.16b
401 ; CHECK-NEXT: mov v3.16b, v4.16b
404 %s0s = sext <8 x i32> %s0 to <8 x i64>
405 %s1s = sext <8 x i32> %s1 to <8 x i64>
406 %m = add <8 x i64> %s0s, %s1s
410 define <8 x i64> @extaddu_v8i32_i64(<8 x i32> %s0, <8 x i32> %s1) {
411 ; CHECK-LABEL: extaddu_v8i32_i64:
412 ; CHECK: // %bb.0: // %entry
413 ; CHECK-NEXT: uaddl2 v4.2d, v1.4s, v3.4s
414 ; CHECK-NEXT: uaddl v5.2d, v0.2s, v2.2s
415 ; CHECK-NEXT: uaddl2 v6.2d, v0.4s, v2.4s
416 ; CHECK-NEXT: uaddl v2.2d, v1.2s, v3.2s
417 ; CHECK-NEXT: mov v0.16b, v5.16b
418 ; CHECK-NEXT: mov v1.16b, v6.16b
419 ; CHECK-NEXT: mov v3.16b, v4.16b
422 %s0s = zext <8 x i32> %s0 to <8 x i64>
423 %s1s = zext <8 x i32> %s1 to <8 x i64>
424 %m = add <8 x i64> %s0s, %s1s
428 define <16 x i32> @add_zs(<16 x i8> %s0, <16 x i8> %s1) {
429 ; CHECK-LABEL: add_zs:
430 ; CHECK: // %bb.0: // %entry
431 ; CHECK-NEXT: ushll v2.8h, v0.8b, #0
432 ; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0
433 ; CHECK-NEXT: saddw v2.8h, v2.8h, v1.8b
434 ; CHECK-NEXT: saddw2 v4.8h, v0.8h, v1.16b
435 ; CHECK-NEXT: sshll v0.4s, v2.4h, #0
436 ; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0
437 ; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0
438 ; CHECK-NEXT: sshll v2.4s, v4.4h, #0
441 %s0s = zext <16 x i8> %s0 to <16 x i32>
442 %s1s = sext <16 x i8> %s1 to <16 x i32>
443 %m = add <16 x i32> %s0s, %s1s
447 define <20 x i32> @v20(<20 x i8> %s0, <20 x i8> %s1) {
449 ; CHECK: // %bb.0: // %entry
450 ; CHECK-NEXT: fmov s0, w0
451 ; CHECK-NEXT: ldr b2, [sp, #160]
452 ; CHECK-NEXT: add x10, sp, #168
453 ; CHECK-NEXT: ldr b3, [sp]
454 ; CHECK-NEXT: add x11, sp, #8
455 ; CHECK-NEXT: ldr b1, [sp, #96]
456 ; CHECK-NEXT: ld1 { v2.b }[1], [x10]
457 ; CHECK-NEXT: add x9, sp, #104
458 ; CHECK-NEXT: add x10, sp, #176
459 ; CHECK-NEXT: mov v0.b[1], w1
460 ; CHECK-NEXT: ld1 { v3.b }[1], [x11]
461 ; CHECK-NEXT: ld1 { v1.b }[1], [x9]
462 ; CHECK-NEXT: add x12, sp, #16
463 ; CHECK-NEXT: add x9, sp, #112
464 ; CHECK-NEXT: add x13, sp, #184
465 ; CHECK-NEXT: ld1 { v2.b }[2], [x10]
466 ; CHECK-NEXT: add x11, sp, #120
467 ; CHECK-NEXT: add x14, sp, #32
468 ; CHECK-NEXT: ld1 { v3.b }[2], [x12]
469 ; CHECK-NEXT: ld1 { v1.b }[2], [x9]
470 ; CHECK-NEXT: ldr b5, [sp, #64]
471 ; CHECK-NEXT: mov v0.b[2], w2
472 ; CHECK-NEXT: ldr b4, [sp, #224]
473 ; CHECK-NEXT: add x10, sp, #128
474 ; CHECK-NEXT: ld1 { v2.b }[3], [x13]
475 ; CHECK-NEXT: add x13, sp, #24
476 ; CHECK-NEXT: add x12, sp, #136
477 ; CHECK-NEXT: ld1 { v3.b }[3], [x13]
478 ; CHECK-NEXT: ld1 { v1.b }[3], [x11]
479 ; CHECK-NEXT: add x11, sp, #192
480 ; CHECK-NEXT: add x13, sp, #200
481 ; CHECK-NEXT: add x15, sp, #80
482 ; CHECK-NEXT: add x9, sp, #144
483 ; CHECK-NEXT: mov v0.b[3], w3
484 ; CHECK-NEXT: ld1 { v2.b }[4], [x11]
485 ; CHECK-NEXT: add x11, sp, #232
486 ; CHECK-NEXT: ld1 { v3.b }[4], [x14]
487 ; CHECK-NEXT: add x14, sp, #72
488 ; CHECK-NEXT: ld1 { v4.b }[1], [x11]
489 ; CHECK-NEXT: ld1 { v5.b }[1], [x14]
490 ; CHECK-NEXT: add x14, sp, #40
491 ; CHECK-NEXT: ld1 { v1.b }[4], [x10]
492 ; CHECK-NEXT: ld1 { v2.b }[5], [x13]
493 ; CHECK-NEXT: add x11, sp, #208
494 ; CHECK-NEXT: add x13, sp, #48
495 ; CHECK-NEXT: mov v0.b[4], w4
496 ; CHECK-NEXT: ld1 { v3.b }[5], [x14]
497 ; CHECK-NEXT: add x14, sp, #240
498 ; CHECK-NEXT: ld1 { v4.b }[2], [x14]
499 ; CHECK-NEXT: ld1 { v5.b }[2], [x15]
500 ; CHECK-NEXT: ld1 { v1.b }[5], [x12]
501 ; CHECK-NEXT: ld1 { v2.b }[6], [x11]
502 ; CHECK-NEXT: add x10, sp, #216
503 ; CHECK-NEXT: add x11, sp, #56
504 ; CHECK-NEXT: ld1 { v3.b }[6], [x13]
505 ; CHECK-NEXT: add x12, sp, #248
506 ; CHECK-NEXT: add x13, sp, #88
507 ; CHECK-NEXT: mov v0.b[5], w5
508 ; CHECK-NEXT: ld1 { v4.b }[3], [x12]
509 ; CHECK-NEXT: ld1 { v5.b }[3], [x13]
510 ; CHECK-NEXT: ld1 { v1.b }[6], [x9]
511 ; CHECK-NEXT: ld1 { v2.b }[7], [x10]
512 ; CHECK-NEXT: add x9, sp, #152
513 ; CHECK-NEXT: ld1 { v3.b }[7], [x11]
514 ; CHECK-NEXT: uaddl v4.8h, v5.8b, v4.8b
515 ; CHECK-NEXT: mov v0.b[6], w6
516 ; CHECK-NEXT: ld1 { v1.b }[7], [x9]
517 ; CHECK-NEXT: uaddl v2.8h, v3.8b, v2.8b
518 ; CHECK-NEXT: mov v0.b[7], w7
519 ; CHECK-NEXT: ushll2 v3.4s, v2.8h, #0
520 ; CHECK-NEXT: ushll v2.4s, v2.4h, #0
521 ; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
522 ; CHECK-NEXT: ushll v1.4s, v4.4h, #0
523 ; CHECK-NEXT: stp q3, q1, [x8, #48]
524 ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
525 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
526 ; CHECK-NEXT: stp q1, q2, [x8, #16]
527 ; CHECK-NEXT: str q0, [x8]
530 %s0s = zext <20 x i8> %s0 to <20 x i32>
531 %s1s = zext <20 x i8> %s1 to <20 x i32>
532 %m = add <20 x i32> %s0s, %s1s
536 define <16 x i32> @i12(<16 x i12> %s0, <16 x i12> %s1) {
538 ; CHECK: // %bb.0: // %entry
539 ; CHECK-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill
540 ; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
541 ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
542 ; CHECK-NEXT: .cfi_def_cfa_offset 48
543 ; CHECK-NEXT: .cfi_offset w19, -8
544 ; CHECK-NEXT: .cfi_offset w20, -16
545 ; CHECK-NEXT: .cfi_offset w21, -24
546 ; CHECK-NEXT: .cfi_offset w22, -32
547 ; CHECK-NEXT: .cfi_offset w23, -48
548 ; CHECK-NEXT: ldr w12, [sp, #112]
549 ; CHECK-NEXT: ldr w14, [sp, #144]
550 ; CHECK-NEXT: fmov s2, w4
551 ; CHECK-NEXT: ldr w16, [sp, #176]
552 ; CHECK-NEXT: ldr w19, [sp, #208]
553 ; CHECK-NEXT: fmov s3, w0
554 ; CHECK-NEXT: ldr w20, [sp, #80]
555 ; CHECK-NEXT: ldr w21, [sp, #48]
556 ; CHECK-NEXT: fmov s5, w12
557 ; CHECK-NEXT: fmov s4, w19
558 ; CHECK-NEXT: fmov s6, w16
559 ; CHECK-NEXT: fmov s7, w14
560 ; CHECK-NEXT: fmov s0, w20
561 ; CHECK-NEXT: fmov s1, w21
562 ; CHECK-NEXT: ldr w10, [sp, #120]
563 ; CHECK-NEXT: ldr w11, [sp, #152]
564 ; CHECK-NEXT: ldr w13, [sp, #184]
565 ; CHECK-NEXT: ldr w15, [sp, #216]
566 ; CHECK-NEXT: ldr w22, [sp, #88]
567 ; CHECK-NEXT: ldr w23, [sp, #56]
568 ; CHECK-NEXT: mov v2.h[1], w5
569 ; CHECK-NEXT: mov v3.h[1], w1
570 ; CHECK-NEXT: mov v5.h[1], w10
571 ; CHECK-NEXT: mov v4.h[1], w15
572 ; CHECK-NEXT: mov v0.h[1], w22
573 ; CHECK-NEXT: mov v1.h[1], w23
574 ; CHECK-NEXT: mov v6.h[1], w13
575 ; CHECK-NEXT: mov v7.h[1], w11
576 ; CHECK-NEXT: ldr w8, [sp, #128]
577 ; CHECK-NEXT: ldr w9, [sp, #160]
578 ; CHECK-NEXT: ldr w17, [sp, #64]
579 ; CHECK-NEXT: ldr w18, [sp, #96]
580 ; CHECK-NEXT: ldr w10, [sp, #192]
581 ; CHECK-NEXT: ldr w11, [sp, #224]
582 ; CHECK-NEXT: mov v2.h[2], w6
583 ; CHECK-NEXT: mov v3.h[2], w2
584 ; CHECK-NEXT: mov v0.h[2], w18
585 ; CHECK-NEXT: mov v1.h[2], w17
586 ; CHECK-NEXT: mov v5.h[2], w8
587 ; CHECK-NEXT: mov v4.h[2], w11
588 ; CHECK-NEXT: mov v6.h[2], w10
589 ; CHECK-NEXT: mov v7.h[2], w9
590 ; CHECK-NEXT: ldr w12, [sp, #72]
591 ; CHECK-NEXT: ldr w13, [sp, #104]
592 ; CHECK-NEXT: ldr w8, [sp, #136]
593 ; CHECK-NEXT: ldr w9, [sp, #168]
594 ; CHECK-NEXT: ldr w10, [sp, #200]
595 ; CHECK-NEXT: ldr w11, [sp, #232]
596 ; CHECK-NEXT: mov v0.h[3], w13
597 ; CHECK-NEXT: mov v1.h[3], w12
598 ; CHECK-NEXT: mov v2.h[3], w7
599 ; CHECK-NEXT: mov v3.h[3], w3
600 ; CHECK-NEXT: mov v5.h[3], w8
601 ; CHECK-NEXT: mov v4.h[3], w11
602 ; CHECK-NEXT: mov v6.h[3], w10
603 ; CHECK-NEXT: mov v7.h[3], w9
604 ; CHECK-NEXT: movi v16.4s, #15, msl #8
605 ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
606 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
607 ; CHECK-NEXT: ushll v1.4s, v1.4h, #0
608 ; CHECK-NEXT: ushll v2.4s, v2.4h, #0
609 ; CHECK-NEXT: ushll v3.4s, v3.4h, #0
610 ; CHECK-NEXT: ushll v5.4s, v5.4h, #0
611 ; CHECK-NEXT: ushll v4.4s, v4.4h, #0
612 ; CHECK-NEXT: ushll v6.4s, v6.4h, #0
613 ; CHECK-NEXT: ushll v7.4s, v7.4h, #0
614 ; CHECK-NEXT: and v17.16b, v0.16b, v16.16b
615 ; CHECK-NEXT: and v18.16b, v1.16b, v16.16b
616 ; CHECK-NEXT: and v1.16b, v2.16b, v16.16b
617 ; CHECK-NEXT: and v0.16b, v3.16b, v16.16b
618 ; CHECK-NEXT: and v2.16b, v5.16b, v16.16b
619 ; CHECK-NEXT: and v3.16b, v4.16b, v16.16b
620 ; CHECK-NEXT: and v4.16b, v6.16b, v16.16b
621 ; CHECK-NEXT: and v5.16b, v7.16b, v16.16b
622 ; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
623 ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
624 ; CHECK-NEXT: add v3.4s, v17.4s, v3.4s
625 ; CHECK-NEXT: add v1.4s, v1.4s, v5.4s
626 ; CHECK-NEXT: add v2.4s, v18.4s, v4.4s
627 ; CHECK-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload
630 %s0s = zext <16 x i12> %s0 to <16 x i32>
631 %s1s = zext <16 x i12> %s1 to <16 x i32>
632 %m = add <16 x i32> %s0s, %s1s
636 define <16 x i32> @sub_zz(<16 x i8> %s0, <16 x i8> %s1) {
637 ; CHECK-LABEL: sub_zz:
638 ; CHECK: // %bb.0: // %entry
639 ; CHECK-NEXT: usubl v2.8h, v0.8b, v1.8b
640 ; CHECK-NEXT: usubl2 v4.8h, v0.16b, v1.16b
641 ; CHECK-NEXT: sshll v0.4s, v2.4h, #0
642 ; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0
643 ; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0
644 ; CHECK-NEXT: sshll v2.4s, v4.4h, #0
647 %s0s = zext <16 x i8> %s0 to <16 x i32>
648 %s1s = zext <16 x i8> %s1 to <16 x i32>
649 %m = sub <16 x i32> %s0s, %s1s
653 define <16 x i32> @sub_ss(<16 x i8> %s0, <16 x i8> %s1) {
654 ; CHECK-LABEL: sub_ss:
655 ; CHECK: // %bb.0: // %entry
656 ; CHECK-NEXT: ssubl v2.8h, v0.8b, v1.8b
657 ; CHECK-NEXT: ssubl2 v4.8h, v0.16b, v1.16b
658 ; CHECK-NEXT: sshll v0.4s, v2.4h, #0
659 ; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0
660 ; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0
661 ; CHECK-NEXT: sshll v2.4s, v4.4h, #0
664 %s0s = sext <16 x i8> %s0 to <16 x i32>
665 %s1s = sext <16 x i8> %s1 to <16 x i32>
666 %m = sub <16 x i32> %s0s, %s1s
670 define <16 x i32> @sub_zs(<16 x i8> %s0, <16 x i8> %s1) {
671 ; CHECK-LABEL: sub_zs:
672 ; CHECK: // %bb.0: // %entry
673 ; CHECK-NEXT: ushll v2.8h, v0.8b, #0
674 ; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0
675 ; CHECK-NEXT: ssubw v2.8h, v2.8h, v1.8b
676 ; CHECK-NEXT: ssubw2 v4.8h, v0.8h, v1.16b
677 ; CHECK-NEXT: sshll v0.4s, v2.4h, #0
678 ; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0
679 ; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0
680 ; CHECK-NEXT: sshll v2.4s, v4.4h, #0
683 %s0s = zext <16 x i8> %s0 to <16 x i32>
684 %s1s = sext <16 x i8> %s1 to <16 x i32>
685 %m = sub <16 x i32> %s0s, %s1s