1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple aarch64 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc < %s -mtriple aarch64 -o - -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 define <8 x i16> @extadds_v8i8_i16(<8 x i8> %s0, <8 x i8> %s1) {
6 ; CHECK-LABEL: extadds_v8i8_i16:
7 ; CHECK: // %bb.0: // %entry
8 ; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b
11 %s0s = sext <8 x i8> %s0 to <8 x i16>
12 %s1s = sext <8 x i8> %s1 to <8 x i16>
13 %m = add <8 x i16> %s0s, %s1s
17 define <8 x i16> @extaddu_v8i8_i16(<8 x i8> %s0, <8 x i8> %s1) {
18 ; CHECK-LABEL: extaddu_v8i8_i16:
19 ; CHECK: // %bb.0: // %entry
20 ; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
23 %s0s = zext <8 x i8> %s0 to <8 x i16>
24 %s1s = zext <8 x i8> %s1 to <8 x i16>
25 %m = add <8 x i16> %s0s, %s1s
29 define <16 x i16> @extadds_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1) {
30 ; CHECK-SD-LABEL: extadds_v16i8_i16:
31 ; CHECK-SD: // %bb.0: // %entry
32 ; CHECK-SD-NEXT: saddl2 v2.8h, v0.16b, v1.16b
33 ; CHECK-SD-NEXT: saddl v0.8h, v0.8b, v1.8b
34 ; CHECK-SD-NEXT: mov v1.16b, v2.16b
37 ; CHECK-GI-LABEL: extadds_v16i8_i16:
38 ; CHECK-GI: // %bb.0: // %entry
39 ; CHECK-GI-NEXT: saddl v2.8h, v0.8b, v1.8b
40 ; CHECK-GI-NEXT: saddl2 v1.8h, v0.16b, v1.16b
41 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
44 %s0s = sext <16 x i8> %s0 to <16 x i16>
45 %s1s = sext <16 x i8> %s1 to <16 x i16>
46 %m = add <16 x i16> %s0s, %s1s
50 define <16 x i16> @extaddu_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1) {
51 ; CHECK-SD-LABEL: extaddu_v16i8_i16:
52 ; CHECK-SD: // %bb.0: // %entry
53 ; CHECK-SD-NEXT: uaddl2 v2.8h, v0.16b, v1.16b
54 ; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
55 ; CHECK-SD-NEXT: mov v1.16b, v2.16b
58 ; CHECK-GI-LABEL: extaddu_v16i8_i16:
59 ; CHECK-GI: // %bb.0: // %entry
60 ; CHECK-GI-NEXT: uaddl v2.8h, v0.8b, v1.8b
61 ; CHECK-GI-NEXT: uaddl2 v1.8h, v0.16b, v1.16b
62 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
65 %s0s = zext <16 x i8> %s0 to <16 x i16>
66 %s1s = zext <16 x i8> %s1 to <16 x i16>
67 %m = add <16 x i16> %s0s, %s1s
71 define <32 x i16> @extadds_v32i8_i16(<32 x i8> %s0, <32 x i8> %s1) {
72 ; CHECK-SD-LABEL: extadds_v32i8_i16:
73 ; CHECK-SD: // %bb.0: // %entry
74 ; CHECK-SD-NEXT: saddl2 v4.8h, v1.16b, v3.16b
75 ; CHECK-SD-NEXT: saddl v5.8h, v0.8b, v2.8b
76 ; CHECK-SD-NEXT: saddl2 v6.8h, v0.16b, v2.16b
77 ; CHECK-SD-NEXT: saddl v2.8h, v1.8b, v3.8b
78 ; CHECK-SD-NEXT: mov v0.16b, v5.16b
79 ; CHECK-SD-NEXT: mov v1.16b, v6.16b
80 ; CHECK-SD-NEXT: mov v3.16b, v4.16b
83 ; CHECK-GI-LABEL: extadds_v32i8_i16:
84 ; CHECK-GI: // %bb.0: // %entry
85 ; CHECK-GI-NEXT: saddl v4.8h, v0.8b, v2.8b
86 ; CHECK-GI-NEXT: saddl2 v5.8h, v0.16b, v2.16b
87 ; CHECK-GI-NEXT: saddl v2.8h, v1.8b, v3.8b
88 ; CHECK-GI-NEXT: saddl2 v3.8h, v1.16b, v3.16b
89 ; CHECK-GI-NEXT: mov v0.16b, v4.16b
90 ; CHECK-GI-NEXT: mov v1.16b, v5.16b
93 %s0s = sext <32 x i8> %s0 to <32 x i16>
94 %s1s = sext <32 x i8> %s1 to <32 x i16>
95 %m = add <32 x i16> %s0s, %s1s
99 define <32 x i16> @extaddu_v32i8_i16(<32 x i8> %s0, <32 x i8> %s1) {
100 ; CHECK-SD-LABEL: extaddu_v32i8_i16:
101 ; CHECK-SD: // %bb.0: // %entry
102 ; CHECK-SD-NEXT: uaddl2 v4.8h, v1.16b, v3.16b
103 ; CHECK-SD-NEXT: uaddl v5.8h, v0.8b, v2.8b
104 ; CHECK-SD-NEXT: uaddl2 v6.8h, v0.16b, v2.16b
105 ; CHECK-SD-NEXT: uaddl v2.8h, v1.8b, v3.8b
106 ; CHECK-SD-NEXT: mov v0.16b, v5.16b
107 ; CHECK-SD-NEXT: mov v1.16b, v6.16b
108 ; CHECK-SD-NEXT: mov v3.16b, v4.16b
111 ; CHECK-GI-LABEL: extaddu_v32i8_i16:
112 ; CHECK-GI: // %bb.0: // %entry
113 ; CHECK-GI-NEXT: uaddl v4.8h, v0.8b, v2.8b
114 ; CHECK-GI-NEXT: uaddl2 v5.8h, v0.16b, v2.16b
115 ; CHECK-GI-NEXT: uaddl v2.8h, v1.8b, v3.8b
116 ; CHECK-GI-NEXT: uaddl2 v3.8h, v1.16b, v3.16b
117 ; CHECK-GI-NEXT: mov v0.16b, v4.16b
118 ; CHECK-GI-NEXT: mov v1.16b, v5.16b
121 %s0s = zext <32 x i8> %s0 to <32 x i16>
122 %s1s = zext <32 x i8> %s1 to <32 x i16>
123 %m = add <32 x i16> %s0s, %s1s
127 define <8 x i32> @extadds_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) {
128 ; CHECK-SD-LABEL: extadds_v8i8_i32:
129 ; CHECK-SD: // %bb.0: // %entry
130 ; CHECK-SD-NEXT: saddl v0.8h, v0.8b, v1.8b
131 ; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0
132 ; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
135 ; CHECK-GI-LABEL: extadds_v8i8_i32:
136 ; CHECK-GI: // %bb.0: // %entry
137 ; CHECK-GI-NEXT: saddl v1.8h, v0.8b, v1.8b
138 ; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0
139 ; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
142 %s0s = sext <8 x i8> %s0 to <8 x i32>
143 %s1s = sext <8 x i8> %s1 to <8 x i32>
144 %m = add <8 x i32> %s0s, %s1s
148 define <8 x i32> @extaddu_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) {
149 ; CHECK-SD-LABEL: extaddu_v8i8_i32:
150 ; CHECK-SD: // %bb.0: // %entry
151 ; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
152 ; CHECK-SD-NEXT: ushll2 v1.4s, v0.8h, #0
153 ; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
156 ; CHECK-GI-LABEL: extaddu_v8i8_i32:
157 ; CHECK-GI: // %bb.0: // %entry
158 ; CHECK-GI-NEXT: uaddl v1.8h, v0.8b, v1.8b
159 ; CHECK-GI-NEXT: ushll v0.4s, v1.4h, #0
160 ; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
163 %s0s = zext <8 x i8> %s0 to <8 x i32>
164 %s1s = zext <8 x i8> %s1 to <8 x i32>
165 %m = add <8 x i32> %s0s, %s1s
169 define <8 x i32> @extsubs_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) {
170 ; CHECK-SD-LABEL: extsubs_v8i8_i32:
171 ; CHECK-SD: // %bb.0: // %entry
172 ; CHECK-SD-NEXT: ssubl v0.8h, v0.8b, v1.8b
173 ; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0
174 ; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
177 ; CHECK-GI-LABEL: extsubs_v8i8_i32:
178 ; CHECK-GI: // %bb.0: // %entry
179 ; CHECK-GI-NEXT: ssubl v1.8h, v0.8b, v1.8b
180 ; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0
181 ; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
184 %s0s = sext <8 x i8> %s0 to <8 x i32>
185 %s1s = sext <8 x i8> %s1 to <8 x i32>
186 %m = sub <8 x i32> %s0s, %s1s
190 define <8 x i32> @extsubu_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) {
191 ; CHECK-SD-LABEL: extsubu_v8i8_i32:
192 ; CHECK-SD: // %bb.0: // %entry
193 ; CHECK-SD-NEXT: usubl v0.8h, v0.8b, v1.8b
194 ; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0
195 ; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
198 ; CHECK-GI-LABEL: extsubu_v8i8_i32:
199 ; CHECK-GI: // %bb.0: // %entry
200 ; CHECK-GI-NEXT: usubl v1.8h, v0.8b, v1.8b
201 ; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0
202 ; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
205 %s0s = zext <8 x i8> %s0 to <8 x i32>
206 %s1s = zext <8 x i8> %s1 to <8 x i32>
207 %m = sub <8 x i32> %s0s, %s1s
211 define <16 x i32> @extadds_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) {
212 ; CHECK-SD-LABEL: extadds_v16i8_i32:
213 ; CHECK-SD: // %bb.0: // %entry
214 ; CHECK-SD-NEXT: saddl v2.8h, v0.8b, v1.8b
215 ; CHECK-SD-NEXT: saddl2 v4.8h, v0.16b, v1.16b
216 ; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0
217 ; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0
218 ; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0
219 ; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0
222 ; CHECK-GI-LABEL: extadds_v16i8_i32:
223 ; CHECK-GI: // %bb.0: // %entry
224 ; CHECK-GI-NEXT: saddl v2.8h, v0.8b, v1.8b
225 ; CHECK-GI-NEXT: saddl2 v3.8h, v0.16b, v1.16b
226 ; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0
227 ; CHECK-GI-NEXT: sshll2 v1.4s, v2.8h, #0
228 ; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0
229 ; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0
232 %s0s = sext <16 x i8> %s0 to <16 x i32>
233 %s1s = sext <16 x i8> %s1 to <16 x i32>
234 %m = add <16 x i32> %s0s, %s1s
238 define <16 x i32> @extaddu_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) {
239 ; CHECK-SD-LABEL: extaddu_v16i8_i32:
240 ; CHECK-SD: // %bb.0: // %entry
241 ; CHECK-SD-NEXT: uaddl v2.8h, v0.8b, v1.8b
242 ; CHECK-SD-NEXT: uaddl2 v4.8h, v0.16b, v1.16b
243 ; CHECK-SD-NEXT: ushll v0.4s, v2.4h, #0
244 ; CHECK-SD-NEXT: ushll2 v3.4s, v4.8h, #0
245 ; CHECK-SD-NEXT: ushll2 v1.4s, v2.8h, #0
246 ; CHECK-SD-NEXT: ushll v2.4s, v4.4h, #0
249 ; CHECK-GI-LABEL: extaddu_v16i8_i32:
250 ; CHECK-GI: // %bb.0: // %entry
251 ; CHECK-GI-NEXT: uaddl v2.8h, v0.8b, v1.8b
252 ; CHECK-GI-NEXT: uaddl2 v3.8h, v0.16b, v1.16b
253 ; CHECK-GI-NEXT: ushll v0.4s, v2.4h, #0
254 ; CHECK-GI-NEXT: ushll2 v1.4s, v2.8h, #0
255 ; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0
256 ; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0
259 %s0s = zext <16 x i8> %s0 to <16 x i32>
260 %s1s = zext <16 x i8> %s1 to <16 x i32>
261 %m = add <16 x i32> %s0s, %s1s
265 define <16 x i32> @extsubs_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) {
266 ; CHECK-SD-LABEL: extsubs_v16i8_i32:
267 ; CHECK-SD: // %bb.0: // %entry
268 ; CHECK-SD-NEXT: ssubl v2.8h, v0.8b, v1.8b
269 ; CHECK-SD-NEXT: ssubl2 v4.8h, v0.16b, v1.16b
270 ; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0
271 ; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0
272 ; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0
273 ; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0
276 ; CHECK-GI-LABEL: extsubs_v16i8_i32:
277 ; CHECK-GI: // %bb.0: // %entry
278 ; CHECK-GI-NEXT: ssubl v2.8h, v0.8b, v1.8b
279 ; CHECK-GI-NEXT: ssubl2 v3.8h, v0.16b, v1.16b
280 ; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0
281 ; CHECK-GI-NEXT: sshll2 v1.4s, v2.8h, #0
282 ; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0
283 ; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0
286 %s0s = sext <16 x i8> %s0 to <16 x i32>
287 %s1s = sext <16 x i8> %s1 to <16 x i32>
288 %m = sub <16 x i32> %s0s, %s1s
292 define <16 x i32> @extsubu_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) {
293 ; CHECK-SD-LABEL: extsubu_v16i8_i32:
294 ; CHECK-SD: // %bb.0: // %entry
295 ; CHECK-SD-NEXT: usubl v2.8h, v0.8b, v1.8b
296 ; CHECK-SD-NEXT: usubl2 v4.8h, v0.16b, v1.16b
297 ; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0
298 ; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0
299 ; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0
300 ; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0
303 ; CHECK-GI-LABEL: extsubu_v16i8_i32:
304 ; CHECK-GI: // %bb.0: // %entry
305 ; CHECK-GI-NEXT: usubl v2.8h, v0.8b, v1.8b
306 ; CHECK-GI-NEXT: usubl2 v3.8h, v0.16b, v1.16b
307 ; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0
308 ; CHECK-GI-NEXT: sshll2 v1.4s, v2.8h, #0
309 ; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0
310 ; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0
313 %s0s = zext <16 x i8> %s0 to <16 x i32>
314 %s1s = zext <16 x i8> %s1 to <16 x i32>
315 %m = sub <16 x i32> %s0s, %s1s
319 define <8 x i64> @extadds_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) {
320 ; CHECK-SD-LABEL: extadds_v8i8_i64:
321 ; CHECK-SD: // %bb.0: // %entry
322 ; CHECK-SD-NEXT: saddl v0.8h, v0.8b, v1.8b
323 ; CHECK-SD-NEXT: sshll v1.4s, v0.4h, #0
324 ; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0
325 ; CHECK-SD-NEXT: sshll v0.2d, v1.2s, #0
326 ; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
327 ; CHECK-SD-NEXT: sshll2 v1.2d, v1.4s, #0
328 ; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
331 ; CHECK-GI-LABEL: extadds_v8i8_i64:
332 ; CHECK-GI: // %bb.0: // %entry
333 ; CHECK-GI-NEXT: saddl v0.8h, v0.8b, v1.8b
334 ; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
335 ; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0
336 ; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0
337 ; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0
338 ; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
339 ; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
342 %s0s = sext <8 x i8> %s0 to <8 x i64>
343 %s1s = sext <8 x i8> %s1 to <8 x i64>
344 %m = add <8 x i64> %s0s, %s1s
348 define <8 x i64> @extaddu_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) {
349 ; CHECK-SD-LABEL: extaddu_v8i8_i64:
350 ; CHECK-SD: // %bb.0: // %entry
351 ; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
352 ; CHECK-SD-NEXT: ushll v1.4s, v0.4h, #0
353 ; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0
354 ; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0
355 ; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0
356 ; CHECK-SD-NEXT: ushll2 v1.2d, v1.4s, #0
357 ; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
360 ; CHECK-GI-LABEL: extaddu_v8i8_i64:
361 ; CHECK-GI: // %bb.0: // %entry
362 ; CHECK-GI-NEXT: uaddl v0.8h, v0.8b, v1.8b
363 ; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
364 ; CHECK-GI-NEXT: ushll2 v3.4s, v0.8h, #0
365 ; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0
366 ; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0
367 ; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0
368 ; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0
371 %s0s = zext <8 x i8> %s0 to <8 x i64>
372 %s1s = zext <8 x i8> %s1 to <8 x i64>
373 %m = add <8 x i64> %s0s, %s1s
377 define <8 x i64> @extsubs_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) {
378 ; CHECK-SD-LABEL: extsubs_v8i8_i64:
379 ; CHECK-SD: // %bb.0: // %entry
380 ; CHECK-SD-NEXT: ssubl v0.8h, v0.8b, v1.8b
381 ; CHECK-SD-NEXT: sshll v1.4s, v0.4h, #0
382 ; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0
383 ; CHECK-SD-NEXT: sshll v0.2d, v1.2s, #0
384 ; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
385 ; CHECK-SD-NEXT: sshll2 v1.2d, v1.4s, #0
386 ; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
389 ; CHECK-GI-LABEL: extsubs_v8i8_i64:
390 ; CHECK-GI: // %bb.0: // %entry
391 ; CHECK-GI-NEXT: ssubl v0.8h, v0.8b, v1.8b
392 ; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
393 ; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0
394 ; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0
395 ; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0
396 ; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
397 ; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
400 %s0s = sext <8 x i8> %s0 to <8 x i64>
401 %s1s = sext <8 x i8> %s1 to <8 x i64>
402 %m = sub <8 x i64> %s0s, %s1s
406 define <8 x i64> @extsubu_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) {
407 ; CHECK-SD-LABEL: extsubu_v8i8_i64:
408 ; CHECK-SD: // %bb.0: // %entry
409 ; CHECK-SD-NEXT: usubl v0.8h, v0.8b, v1.8b
410 ; CHECK-SD-NEXT: sshll v1.4s, v0.4h, #0
411 ; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0
412 ; CHECK-SD-NEXT: sshll v0.2d, v1.2s, #0
413 ; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
414 ; CHECK-SD-NEXT: sshll2 v1.2d, v1.4s, #0
415 ; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
418 ; CHECK-GI-LABEL: extsubu_v8i8_i64:
419 ; CHECK-GI: // %bb.0: // %entry
420 ; CHECK-GI-NEXT: usubl v0.8h, v0.8b, v1.8b
421 ; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
422 ; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0
423 ; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0
424 ; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0
425 ; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
426 ; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
429 %s0s = zext <8 x i8> %s0 to <8 x i64>
430 %s1s = zext <8 x i8> %s1 to <8 x i64>
431 %m = sub <8 x i64> %s0s, %s1s
435 define <16 x i64> @extaddu_v16i8_i64(<16 x i8> %a, <16 x i8> %b) {
436 ; CHECK-SD-LABEL: extaddu_v16i8_i64:
437 ; CHECK-SD: // %bb.0:
438 ; CHECK-SD-NEXT: uaddl v2.8h, v0.8b, v1.8b
439 ; CHECK-SD-NEXT: uaddl2 v0.8h, v0.16b, v1.16b
440 ; CHECK-SD-NEXT: ushll v3.4s, v2.4h, #0
441 ; CHECK-SD-NEXT: ushll2 v2.4s, v2.8h, #0
442 ; CHECK-SD-NEXT: ushll v5.4s, v0.4h, #0
443 ; CHECK-SD-NEXT: ushll2 v6.4s, v0.8h, #0
444 ; CHECK-SD-NEXT: ushll2 v1.2d, v3.4s, #0
445 ; CHECK-SD-NEXT: ushll v0.2d, v3.2s, #0
446 ; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0
447 ; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
448 ; CHECK-SD-NEXT: ushll v4.2d, v5.2s, #0
449 ; CHECK-SD-NEXT: ushll2 v7.2d, v6.4s, #0
450 ; CHECK-SD-NEXT: ushll2 v5.2d, v5.4s, #0
451 ; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0
454 ; CHECK-GI-LABEL: extaddu_v16i8_i64:
455 ; CHECK-GI: // %bb.0:
456 ; CHECK-GI-NEXT: uaddl v2.8h, v0.8b, v1.8b
457 ; CHECK-GI-NEXT: uaddl2 v0.8h, v0.16b, v1.16b
458 ; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0
459 ; CHECK-GI-NEXT: ushll2 v3.4s, v2.8h, #0
460 ; CHECK-GI-NEXT: ushll v5.4s, v0.4h, #0
461 ; CHECK-GI-NEXT: ushll2 v7.4s, v0.8h, #0
462 ; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0
463 ; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0
464 ; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0
465 ; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0
466 ; CHECK-GI-NEXT: ushll v4.2d, v5.2s, #0
467 ; CHECK-GI-NEXT: ushll2 v5.2d, v5.4s, #0
468 ; CHECK-GI-NEXT: ushll v6.2d, v7.2s, #0
469 ; CHECK-GI-NEXT: ushll2 v7.2d, v7.4s, #0
471 %c = zext <16 x i8> %a to <16 x i64>
472 %d = zext <16 x i8> %b to <16 x i64>
473 %e = add <16 x i64> %c, %d
477 define <16 x i64> @extadds_v16i8_i64(<16 x i8> %a, <16 x i8> %b) {
478 ; CHECK-SD-LABEL: extadds_v16i8_i64:
479 ; CHECK-SD: // %bb.0:
480 ; CHECK-SD-NEXT: saddl v2.8h, v0.8b, v1.8b
481 ; CHECK-SD-NEXT: saddl2 v0.8h, v0.16b, v1.16b
482 ; CHECK-SD-NEXT: sshll v3.4s, v2.4h, #0
483 ; CHECK-SD-NEXT: sshll2 v2.4s, v2.8h, #0
484 ; CHECK-SD-NEXT: sshll v5.4s, v0.4h, #0
485 ; CHECK-SD-NEXT: sshll2 v6.4s, v0.8h, #0
486 ; CHECK-SD-NEXT: sshll2 v1.2d, v3.4s, #0
487 ; CHECK-SD-NEXT: sshll v0.2d, v3.2s, #0
488 ; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
489 ; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
490 ; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0
491 ; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0
492 ; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0
493 ; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0
496 ; CHECK-GI-LABEL: extadds_v16i8_i64:
497 ; CHECK-GI: // %bb.0:
498 ; CHECK-GI-NEXT: saddl v2.8h, v0.8b, v1.8b
499 ; CHECK-GI-NEXT: saddl2 v0.8h, v0.16b, v1.16b
500 ; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0
501 ; CHECK-GI-NEXT: sshll2 v3.4s, v2.8h, #0
502 ; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0
503 ; CHECK-GI-NEXT: sshll2 v7.4s, v0.8h, #0
504 ; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0
505 ; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0
506 ; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
507 ; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
508 ; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0
509 ; CHECK-GI-NEXT: sshll2 v5.2d, v5.4s, #0
510 ; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0
511 ; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0
513 %c = sext <16 x i8> %a to <16 x i64>
514 %d = sext <16 x i8> %b to <16 x i64>
515 %e = add <16 x i64> %c, %d
519 define <16 x i64> @extsubu_v16i8_i64(<16 x i8> %a, <16 x i8> %b) {
520 ; CHECK-SD-LABEL: extsubu_v16i8_i64:
521 ; CHECK-SD: // %bb.0:
522 ; CHECK-SD-NEXT: usubl v2.8h, v0.8b, v1.8b
523 ; CHECK-SD-NEXT: usubl2 v0.8h, v0.16b, v1.16b
524 ; CHECK-SD-NEXT: sshll v3.4s, v2.4h, #0
525 ; CHECK-SD-NEXT: sshll2 v2.4s, v2.8h, #0
526 ; CHECK-SD-NEXT: sshll v5.4s, v0.4h, #0
527 ; CHECK-SD-NEXT: sshll2 v6.4s, v0.8h, #0
528 ; CHECK-SD-NEXT: sshll2 v1.2d, v3.4s, #0
529 ; CHECK-SD-NEXT: sshll v0.2d, v3.2s, #0
530 ; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
531 ; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
532 ; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0
533 ; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0
534 ; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0
535 ; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0
538 ; CHECK-GI-LABEL: extsubu_v16i8_i64:
539 ; CHECK-GI: // %bb.0:
540 ; CHECK-GI-NEXT: usubl v2.8h, v0.8b, v1.8b
541 ; CHECK-GI-NEXT: usubl2 v0.8h, v0.16b, v1.16b
542 ; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0
543 ; CHECK-GI-NEXT: sshll2 v3.4s, v2.8h, #0
544 ; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0
545 ; CHECK-GI-NEXT: sshll2 v7.4s, v0.8h, #0
546 ; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0
547 ; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0
548 ; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
549 ; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
550 ; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0
551 ; CHECK-GI-NEXT: sshll2 v5.2d, v5.4s, #0
552 ; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0
553 ; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0
555 %c = zext <16 x i8> %a to <16 x i64>
556 %d = zext <16 x i8> %b to <16 x i64>
557 %e = sub <16 x i64> %c, %d
561 define <16 x i64> @extsubs_v16i8_i64(<16 x i8> %a, <16 x i8> %b) {
562 ; CHECK-SD-LABEL: extsubs_v16i8_i64:
563 ; CHECK-SD: // %bb.0:
564 ; CHECK-SD-NEXT: ssubl v2.8h, v0.8b, v1.8b
565 ; CHECK-SD-NEXT: ssubl2 v0.8h, v0.16b, v1.16b
566 ; CHECK-SD-NEXT: sshll v3.4s, v2.4h, #0
567 ; CHECK-SD-NEXT: sshll2 v2.4s, v2.8h, #0
568 ; CHECK-SD-NEXT: sshll v5.4s, v0.4h, #0
569 ; CHECK-SD-NEXT: sshll2 v6.4s, v0.8h, #0
570 ; CHECK-SD-NEXT: sshll2 v1.2d, v3.4s, #0
571 ; CHECK-SD-NEXT: sshll v0.2d, v3.2s, #0
572 ; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
573 ; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
574 ; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0
575 ; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0
576 ; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0
577 ; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0
580 ; CHECK-GI-LABEL: extsubs_v16i8_i64:
581 ; CHECK-GI: // %bb.0:
582 ; CHECK-GI-NEXT: ssubl v2.8h, v0.8b, v1.8b
583 ; CHECK-GI-NEXT: ssubl2 v0.8h, v0.16b, v1.16b
584 ; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0
585 ; CHECK-GI-NEXT: sshll2 v3.4s, v2.8h, #0
586 ; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0
587 ; CHECK-GI-NEXT: sshll2 v7.4s, v0.8h, #0
588 ; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0
589 ; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0
590 ; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
591 ; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
592 ; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0
593 ; CHECK-GI-NEXT: sshll2 v5.2d, v5.4s, #0
594 ; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0
595 ; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0
597 %c = sext <16 x i8> %a to <16 x i64>
598 %d = sext <16 x i8> %b to <16 x i64>
599 %e = sub <16 x i64> %c, %d
603 define <16 x i64> @extaddu_v16i16_i64(<16 x i16> %a, <16 x i16> %b) {
604 ; CHECK-SD-LABEL: extaddu_v16i16_i64:
605 ; CHECK-SD: // %bb.0:
606 ; CHECK-SD-NEXT: uaddl v5.4s, v1.4h, v3.4h
607 ; CHECK-SD-NEXT: uaddl v4.4s, v0.4h, v2.4h
608 ; CHECK-SD-NEXT: uaddl2 v2.4s, v0.8h, v2.8h
609 ; CHECK-SD-NEXT: uaddl2 v6.4s, v1.8h, v3.8h
610 ; CHECK-SD-NEXT: ushll2 v1.2d, v4.4s, #0
611 ; CHECK-SD-NEXT: ushll v0.2d, v4.2s, #0
612 ; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0
613 ; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
614 ; CHECK-SD-NEXT: ushll v4.2d, v5.2s, #0
615 ; CHECK-SD-NEXT: ushll2 v7.2d, v6.4s, #0
616 ; CHECK-SD-NEXT: ushll2 v5.2d, v5.4s, #0
617 ; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0
620 ; CHECK-GI-LABEL: extaddu_v16i16_i64:
621 ; CHECK-GI: // %bb.0:
622 ; CHECK-GI-NEXT: uaddl v4.4s, v0.4h, v2.4h
623 ; CHECK-GI-NEXT: uaddl2 v5.4s, v0.8h, v2.8h
624 ; CHECK-GI-NEXT: uaddl v6.4s, v1.4h, v3.4h
625 ; CHECK-GI-NEXT: uaddl2 v7.4s, v1.8h, v3.8h
626 ; CHECK-GI-NEXT: ushll v0.2d, v4.2s, #0
627 ; CHECK-GI-NEXT: ushll2 v1.2d, v4.4s, #0
628 ; CHECK-GI-NEXT: ushll v2.2d, v5.2s, #0
629 ; CHECK-GI-NEXT: ushll2 v3.2d, v5.4s, #0
630 ; CHECK-GI-NEXT: ushll v4.2d, v6.2s, #0
631 ; CHECK-GI-NEXT: ushll2 v5.2d, v6.4s, #0
632 ; CHECK-GI-NEXT: ushll v6.2d, v7.2s, #0
633 ; CHECK-GI-NEXT: ushll2 v7.2d, v7.4s, #0
635 %c = zext <16 x i16> %a to <16 x i64>
636 %d = zext <16 x i16> %b to <16 x i64>
637 %e = add <16 x i64> %c, %d
641 define <16 x i64> @extadds_v16i16_i64(<16 x i16> %a, <16 x i16> %b) {
642 ; CHECK-SD-LABEL: extadds_v16i16_i64:
643 ; CHECK-SD: // %bb.0:
644 ; CHECK-SD-NEXT: saddl v5.4s, v1.4h, v3.4h
645 ; CHECK-SD-NEXT: saddl v4.4s, v0.4h, v2.4h
646 ; CHECK-SD-NEXT: saddl2 v2.4s, v0.8h, v2.8h
647 ; CHECK-SD-NEXT: saddl2 v6.4s, v1.8h, v3.8h
648 ; CHECK-SD-NEXT: sshll2 v1.2d, v4.4s, #0
649 ; CHECK-SD-NEXT: sshll v0.2d, v4.2s, #0
650 ; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
651 ; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
652 ; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0
653 ; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0
654 ; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0
655 ; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0
658 ; CHECK-GI-LABEL: extadds_v16i16_i64:
659 ; CHECK-GI: // %bb.0:
660 ; CHECK-GI-NEXT: saddl v4.4s, v0.4h, v2.4h
661 ; CHECK-GI-NEXT: saddl2 v5.4s, v0.8h, v2.8h
662 ; CHECK-GI-NEXT: saddl v6.4s, v1.4h, v3.4h
663 ; CHECK-GI-NEXT: saddl2 v7.4s, v1.8h, v3.8h
664 ; CHECK-GI-NEXT: sshll v0.2d, v4.2s, #0
665 ; CHECK-GI-NEXT: sshll2 v1.2d, v4.4s, #0
666 ; CHECK-GI-NEXT: sshll v2.2d, v5.2s, #0
667 ; CHECK-GI-NEXT: sshll2 v3.2d, v5.4s, #0
668 ; CHECK-GI-NEXT: sshll v4.2d, v6.2s, #0
669 ; CHECK-GI-NEXT: sshll2 v5.2d, v6.4s, #0
670 ; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0
671 ; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0
673 %c = sext <16 x i16> %a to <16 x i64>
674 %d = sext <16 x i16> %b to <16 x i64>
675 %e = add <16 x i64> %c, %d
679 define <16 x i64> @extsubu_v16i16_i64(<16 x i16> %a, <16 x i16> %b) {
680 ; CHECK-SD-LABEL: extsubu_v16i16_i64:
681 ; CHECK-SD: // %bb.0:
682 ; CHECK-SD-NEXT: usubl v5.4s, v1.4h, v3.4h
683 ; CHECK-SD-NEXT: usubl v4.4s, v0.4h, v2.4h
684 ; CHECK-SD-NEXT: usubl2 v2.4s, v0.8h, v2.8h
685 ; CHECK-SD-NEXT: usubl2 v6.4s, v1.8h, v3.8h
686 ; CHECK-SD-NEXT: sshll2 v1.2d, v4.4s, #0
687 ; CHECK-SD-NEXT: sshll v0.2d, v4.2s, #0
688 ; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
689 ; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
690 ; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0
691 ; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0
692 ; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0
693 ; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0
696 ; CHECK-GI-LABEL: extsubu_v16i16_i64:
697 ; CHECK-GI: // %bb.0:
698 ; CHECK-GI-NEXT: usubl v4.4s, v0.4h, v2.4h
699 ; CHECK-GI-NEXT: usubl2 v5.4s, v0.8h, v2.8h
700 ; CHECK-GI-NEXT: usubl v6.4s, v1.4h, v3.4h
701 ; CHECK-GI-NEXT: usubl2 v7.4s, v1.8h, v3.8h
702 ; CHECK-GI-NEXT: sshll v0.2d, v4.2s, #0
703 ; CHECK-GI-NEXT: sshll2 v1.2d, v4.4s, #0
704 ; CHECK-GI-NEXT: sshll v2.2d, v5.2s, #0
705 ; CHECK-GI-NEXT: sshll2 v3.2d, v5.4s, #0
706 ; CHECK-GI-NEXT: sshll v4.2d, v6.2s, #0
707 ; CHECK-GI-NEXT: sshll2 v5.2d, v6.4s, #0
708 ; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0
709 ; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0
711 %c = zext <16 x i16> %a to <16 x i64>
712 %d = zext <16 x i16> %b to <16 x i64>
713 %e = sub <16 x i64> %c, %d
717 define <16 x i64> @extsubs_v16i16_i64(<16 x i16> %a, <16 x i16> %b) {
718 ; CHECK-SD-LABEL: extsubs_v16i16_i64:
719 ; CHECK-SD: // %bb.0:
720 ; CHECK-SD-NEXT: ssubl v5.4s, v1.4h, v3.4h
721 ; CHECK-SD-NEXT: ssubl v4.4s, v0.4h, v2.4h
722 ; CHECK-SD-NEXT: ssubl2 v2.4s, v0.8h, v2.8h
723 ; CHECK-SD-NEXT: ssubl2 v6.4s, v1.8h, v3.8h
724 ; CHECK-SD-NEXT: sshll2 v1.2d, v4.4s, #0
725 ; CHECK-SD-NEXT: sshll v0.2d, v4.2s, #0
726 ; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0
727 ; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
728 ; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0
729 ; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0
730 ; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0
731 ; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0
734 ; CHECK-GI-LABEL: extsubs_v16i16_i64:
735 ; CHECK-GI: // %bb.0:
736 ; CHECK-GI-NEXT: ssubl v4.4s, v0.4h, v2.4h
737 ; CHECK-GI-NEXT: ssubl2 v5.4s, v0.8h, v2.8h
738 ; CHECK-GI-NEXT: ssubl v6.4s, v1.4h, v3.4h
739 ; CHECK-GI-NEXT: ssubl2 v7.4s, v1.8h, v3.8h
740 ; CHECK-GI-NEXT: sshll v0.2d, v4.2s, #0
741 ; CHECK-GI-NEXT: sshll2 v1.2d, v4.4s, #0
742 ; CHECK-GI-NEXT: sshll v2.2d, v5.2s, #0
743 ; CHECK-GI-NEXT: sshll2 v3.2d, v5.4s, #0
744 ; CHECK-GI-NEXT: sshll v4.2d, v6.2s, #0
745 ; CHECK-GI-NEXT: sshll2 v5.2d, v6.4s, #0
746 ; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0
747 ; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0
749 %c = sext <16 x i16> %a to <16 x i64>
750 %d = sext <16 x i16> %b to <16 x i64>
751 %e = sub <16 x i64> %c, %d
755 define <4 x i32> @extadds_v4i16_i32(<4 x i16> %s0, <4 x i16> %s1) {
756 ; CHECK-LABEL: extadds_v4i16_i32:
757 ; CHECK: // %bb.0: // %entry
758 ; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h
761 %s0s = sext <4 x i16> %s0 to <4 x i32>
762 %s1s = sext <4 x i16> %s1 to <4 x i32>
763 %m = add <4 x i32> %s0s, %s1s
767 define <4 x i32> @extaddu_v4i16_i32(<4 x i16> %s0, <4 x i16> %s1) {
768 ; CHECK-LABEL: extaddu_v4i16_i32:
769 ; CHECK: // %bb.0: // %entry
770 ; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h
773 %s0s = zext <4 x i16> %s0 to <4 x i32>
774 %s1s = zext <4 x i16> %s1 to <4 x i32>
775 %m = add <4 x i32> %s0s, %s1s
779 define <8 x i32> @extadds_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1) {
780 ; CHECK-SD-LABEL: extadds_v8i16_i32:
781 ; CHECK-SD: // %bb.0: // %entry
782 ; CHECK-SD-NEXT: saddl2 v2.4s, v0.8h, v1.8h
783 ; CHECK-SD-NEXT: saddl v0.4s, v0.4h, v1.4h
784 ; CHECK-SD-NEXT: mov v1.16b, v2.16b
787 ; CHECK-GI-LABEL: extadds_v8i16_i32:
788 ; CHECK-GI: // %bb.0: // %entry
789 ; CHECK-GI-NEXT: saddl v2.4s, v0.4h, v1.4h
790 ; CHECK-GI-NEXT: saddl2 v1.4s, v0.8h, v1.8h
791 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
794 %s0s = sext <8 x i16> %s0 to <8 x i32>
795 %s1s = sext <8 x i16> %s1 to <8 x i32>
796 %m = add <8 x i32> %s0s, %s1s
800 define <8 x i32> @extaddu_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1) {
801 ; CHECK-SD-LABEL: extaddu_v8i16_i32:
802 ; CHECK-SD: // %bb.0: // %entry
803 ; CHECK-SD-NEXT: uaddl2 v2.4s, v0.8h, v1.8h
804 ; CHECK-SD-NEXT: uaddl v0.4s, v0.4h, v1.4h
805 ; CHECK-SD-NEXT: mov v1.16b, v2.16b
808 ; CHECK-GI-LABEL: extaddu_v8i16_i32:
809 ; CHECK-GI: // %bb.0: // %entry
810 ; CHECK-GI-NEXT: uaddl v2.4s, v0.4h, v1.4h
811 ; CHECK-GI-NEXT: uaddl2 v1.4s, v0.8h, v1.8h
812 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
815 %s0s = zext <8 x i16> %s0 to <8 x i32>
816 %s1s = zext <8 x i16> %s1 to <8 x i32>
817 %m = add <8 x i32> %s0s, %s1s
821 define <16 x i32> @extadds_v16i16_i32(<16 x i16> %s0, <16 x i16> %s1) {
822 ; CHECK-SD-LABEL: extadds_v16i16_i32:
823 ; CHECK-SD: // %bb.0: // %entry
824 ; CHECK-SD-NEXT: saddl2 v4.4s, v1.8h, v3.8h
825 ; CHECK-SD-NEXT: saddl v5.4s, v0.4h, v2.4h
826 ; CHECK-SD-NEXT: saddl2 v6.4s, v0.8h, v2.8h
827 ; CHECK-SD-NEXT: saddl v2.4s, v1.4h, v3.4h
828 ; CHECK-SD-NEXT: mov v0.16b, v5.16b
829 ; CHECK-SD-NEXT: mov v1.16b, v6.16b
830 ; CHECK-SD-NEXT: mov v3.16b, v4.16b
833 ; CHECK-GI-LABEL: extadds_v16i16_i32:
834 ; CHECK-GI: // %bb.0: // %entry
835 ; CHECK-GI-NEXT: saddl v4.4s, v0.4h, v2.4h
836 ; CHECK-GI-NEXT: saddl2 v5.4s, v0.8h, v2.8h
837 ; CHECK-GI-NEXT: saddl v2.4s, v1.4h, v3.4h
838 ; CHECK-GI-NEXT: saddl2 v3.4s, v1.8h, v3.8h
839 ; CHECK-GI-NEXT: mov v0.16b, v4.16b
840 ; CHECK-GI-NEXT: mov v1.16b, v5.16b
843 %s0s = sext <16 x i16> %s0 to <16 x i32>
844 %s1s = sext <16 x i16> %s1 to <16 x i32>
845 %m = add <16 x i32> %s0s, %s1s
849 define <16 x i32> @extaddu_v16i16_i32(<16 x i16> %s0, <16 x i16> %s1) {
850 ; CHECK-SD-LABEL: extaddu_v16i16_i32:
851 ; CHECK-SD: // %bb.0: // %entry
852 ; CHECK-SD-NEXT: uaddl2 v4.4s, v1.8h, v3.8h
853 ; CHECK-SD-NEXT: uaddl v5.4s, v0.4h, v2.4h
854 ; CHECK-SD-NEXT: uaddl2 v6.4s, v0.8h, v2.8h
855 ; CHECK-SD-NEXT: uaddl v2.4s, v1.4h, v3.4h
856 ; CHECK-SD-NEXT: mov v0.16b, v5.16b
857 ; CHECK-SD-NEXT: mov v1.16b, v6.16b
858 ; CHECK-SD-NEXT: mov v3.16b, v4.16b
861 ; CHECK-GI-LABEL: extaddu_v16i16_i32:
862 ; CHECK-GI: // %bb.0: // %entry
863 ; CHECK-GI-NEXT: uaddl v4.4s, v0.4h, v2.4h
864 ; CHECK-GI-NEXT: uaddl2 v5.4s, v0.8h, v2.8h
865 ; CHECK-GI-NEXT: uaddl v2.4s, v1.4h, v3.4h
866 ; CHECK-GI-NEXT: uaddl2 v3.4s, v1.8h, v3.8h
867 ; CHECK-GI-NEXT: mov v0.16b, v4.16b
868 ; CHECK-GI-NEXT: mov v1.16b, v5.16b
871 %s0s = zext <16 x i16> %s0 to <16 x i32>
872 %s1s = zext <16 x i16> %s1 to <16 x i32>
873 %m = add <16 x i32> %s0s, %s1s
877 define <4 x i64> @extadds_v4i16_i64(<4 x i16> %s0, <4 x i16> %s1) {
878 ; CHECK-SD-LABEL: extadds_v4i16_i64:
879 ; CHECK-SD: // %bb.0: // %entry
880 ; CHECK-SD-NEXT: saddl v0.4s, v0.4h, v1.4h
881 ; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0
882 ; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
885 ; CHECK-GI-LABEL: extadds_v4i16_i64:
886 ; CHECK-GI: // %bb.0: // %entry
887 ; CHECK-GI-NEXT: saddl v1.4s, v0.4h, v1.4h
888 ; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0
889 ; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0
892 %s0s = sext <4 x i16> %s0 to <4 x i64>
893 %s1s = sext <4 x i16> %s1 to <4 x i64>
894 %m = add <4 x i64> %s0s, %s1s
898 define <4 x i64> @extaddu_v4i16_i64(<4 x i16> %s0, <4 x i16> %s1) {
899 ; CHECK-SD-LABEL: extaddu_v4i16_i64:
900 ; CHECK-SD: // %bb.0: // %entry
901 ; CHECK-SD-NEXT: uaddl v0.4s, v0.4h, v1.4h
902 ; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0
903 ; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
906 ; CHECK-GI-LABEL: extaddu_v4i16_i64:
907 ; CHECK-GI: // %bb.0: // %entry
908 ; CHECK-GI-NEXT: uaddl v1.4s, v0.4h, v1.4h
909 ; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0
910 ; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0
913 %s0s = zext <4 x i16> %s0 to <4 x i64>
914 %s1s = zext <4 x i16> %s1 to <4 x i64>
915 %m = add <4 x i64> %s0s, %s1s
919 define <8 x i64> @extadds_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) {
920 ; CHECK-SD-LABEL: extadds_v8i16_i64:
921 ; CHECK-SD: // %bb.0: // %entry
922 ; CHECK-SD-NEXT: saddl v2.4s, v0.4h, v1.4h
923 ; CHECK-SD-NEXT: saddl2 v4.4s, v0.8h, v1.8h
924 ; CHECK-SD-NEXT: sshll v0.2d, v2.2s, #0
925 ; CHECK-SD-NEXT: sshll2 v3.2d, v4.4s, #0
926 ; CHECK-SD-NEXT: sshll2 v1.2d, v2.4s, #0
927 ; CHECK-SD-NEXT: sshll v2.2d, v4.2s, #0
930 ; CHECK-GI-LABEL: extadds_v8i16_i64:
931 ; CHECK-GI: // %bb.0: // %entry
932 ; CHECK-GI-NEXT: saddl v2.4s, v0.4h, v1.4h
933 ; CHECK-GI-NEXT: saddl2 v3.4s, v0.8h, v1.8h
934 ; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0
935 ; CHECK-GI-NEXT: sshll2 v1.2d, v2.4s, #0
936 ; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
937 ; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
940 %s0s = sext <8 x i16> %s0 to <8 x i64>
941 %s1s = sext <8 x i16> %s1 to <8 x i64>
942 %m = add <8 x i64> %s0s, %s1s
946 define <8 x i64> @extaddu_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) {
947 ; CHECK-SD-LABEL: extaddu_v8i16_i64:
948 ; CHECK-SD: // %bb.0: // %entry
949 ; CHECK-SD-NEXT: uaddl v2.4s, v0.4h, v1.4h
950 ; CHECK-SD-NEXT: uaddl2 v4.4s, v0.8h, v1.8h
951 ; CHECK-SD-NEXT: ushll v0.2d, v2.2s, #0
952 ; CHECK-SD-NEXT: ushll2 v3.2d, v4.4s, #0
953 ; CHECK-SD-NEXT: ushll2 v1.2d, v2.4s, #0
954 ; CHECK-SD-NEXT: ushll v2.2d, v4.2s, #0
957 ; CHECK-GI-LABEL: extaddu_v8i16_i64:
958 ; CHECK-GI: // %bb.0: // %entry
959 ; CHECK-GI-NEXT: uaddl v2.4s, v0.4h, v1.4h
960 ; CHECK-GI-NEXT: uaddl2 v3.4s, v0.8h, v1.8h
961 ; CHECK-GI-NEXT: ushll v0.2d, v2.2s, #0
962 ; CHECK-GI-NEXT: ushll2 v1.2d, v2.4s, #0
963 ; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0
964 ; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0
967 %s0s = zext <8 x i16> %s0 to <8 x i64>
968 %s1s = zext <8 x i16> %s1 to <8 x i64>
969 %m = add <8 x i64> %s0s, %s1s
973 define <8 x i64> @extsubs_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) {
974 ; CHECK-SD-LABEL: extsubs_v8i16_i64:
975 ; CHECK-SD: // %bb.0: // %entry
976 ; CHECK-SD-NEXT: ssubl v2.4s, v0.4h, v1.4h
977 ; CHECK-SD-NEXT: ssubl2 v4.4s, v0.8h, v1.8h
978 ; CHECK-SD-NEXT: sshll v0.2d, v2.2s, #0
979 ; CHECK-SD-NEXT: sshll2 v3.2d, v4.4s, #0
980 ; CHECK-SD-NEXT: sshll2 v1.2d, v2.4s, #0
981 ; CHECK-SD-NEXT: sshll v2.2d, v4.2s, #0
984 ; CHECK-GI-LABEL: extsubs_v8i16_i64:
985 ; CHECK-GI: // %bb.0: // %entry
986 ; CHECK-GI-NEXT: ssubl v2.4s, v0.4h, v1.4h
987 ; CHECK-GI-NEXT: ssubl2 v3.4s, v0.8h, v1.8h
988 ; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0
989 ; CHECK-GI-NEXT: sshll2 v1.2d, v2.4s, #0
990 ; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
991 ; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
994 %s0s = sext <8 x i16> %s0 to <8 x i64>
995 %s1s = sext <8 x i16> %s1 to <8 x i64>
996 %m = sub <8 x i64> %s0s, %s1s
1000 define <8 x i64> @extsubu_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) {
1001 ; CHECK-SD-LABEL: extsubu_v8i16_i64:
1002 ; CHECK-SD: // %bb.0: // %entry
1003 ; CHECK-SD-NEXT: usubl v2.4s, v0.4h, v1.4h
1004 ; CHECK-SD-NEXT: usubl2 v4.4s, v0.8h, v1.8h
1005 ; CHECK-SD-NEXT: sshll v0.2d, v2.2s, #0
1006 ; CHECK-SD-NEXT: sshll2 v3.2d, v4.4s, #0
1007 ; CHECK-SD-NEXT: sshll2 v1.2d, v2.4s, #0
1008 ; CHECK-SD-NEXT: sshll v2.2d, v4.2s, #0
1009 ; CHECK-SD-NEXT: ret
1011 ; CHECK-GI-LABEL: extsubu_v8i16_i64:
1012 ; CHECK-GI: // %bb.0: // %entry
1013 ; CHECK-GI-NEXT: usubl v2.4s, v0.4h, v1.4h
1014 ; CHECK-GI-NEXT: usubl2 v3.4s, v0.8h, v1.8h
1015 ; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0
1016 ; CHECK-GI-NEXT: sshll2 v1.2d, v2.4s, #0
1017 ; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
1018 ; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
1019 ; CHECK-GI-NEXT: ret
1021 %s0s = zext <8 x i16> %s0 to <8 x i64>
1022 %s1s = zext <8 x i16> %s1 to <8 x i64>
1023 %m = sub <8 x i64> %s0s, %s1s
1027 define <2 x i64> @extadds_v2i32_i64(<2 x i32> %s0, <2 x i32> %s1) {
1028 ; CHECK-LABEL: extadds_v2i32_i64:
1029 ; CHECK: // %bb.0: // %entry
1030 ; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s
1033 %s0s = sext <2 x i32> %s0 to <2 x i64>
1034 %s1s = sext <2 x i32> %s1 to <2 x i64>
1035 %m = add <2 x i64> %s0s, %s1s
1039 define <2 x i64> @extaddu_v2i32_i64(<2 x i32> %s0, <2 x i32> %s1) {
1040 ; CHECK-LABEL: extaddu_v2i32_i64:
1041 ; CHECK: // %bb.0: // %entry
1042 ; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s
1045 %s0s = zext <2 x i32> %s0 to <2 x i64>
1046 %s1s = zext <2 x i32> %s1 to <2 x i64>
1047 %m = add <2 x i64> %s0s, %s1s
1051 define <4 x i64> @extadds_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1) {
1052 ; CHECK-SD-LABEL: extadds_v4i32_i64:
1053 ; CHECK-SD: // %bb.0: // %entry
1054 ; CHECK-SD-NEXT: saddl2 v2.2d, v0.4s, v1.4s
1055 ; CHECK-SD-NEXT: saddl v0.2d, v0.2s, v1.2s
1056 ; CHECK-SD-NEXT: mov v1.16b, v2.16b
1057 ; CHECK-SD-NEXT: ret
1059 ; CHECK-GI-LABEL: extadds_v4i32_i64:
1060 ; CHECK-GI: // %bb.0: // %entry
1061 ; CHECK-GI-NEXT: saddl v2.2d, v0.2s, v1.2s
1062 ; CHECK-GI-NEXT: saddl2 v1.2d, v0.4s, v1.4s
1063 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
1064 ; CHECK-GI-NEXT: ret
1066 %s0s = sext <4 x i32> %s0 to <4 x i64>
1067 %s1s = sext <4 x i32> %s1 to <4 x i64>
1068 %m = add <4 x i64> %s0s, %s1s
1072 define <4 x i64> @extaddu_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1) {
1073 ; CHECK-SD-LABEL: extaddu_v4i32_i64:
1074 ; CHECK-SD: // %bb.0: // %entry
1075 ; CHECK-SD-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
1076 ; CHECK-SD-NEXT: uaddl v0.2d, v0.2s, v1.2s
1077 ; CHECK-SD-NEXT: mov v1.16b, v2.16b
1078 ; CHECK-SD-NEXT: ret
1080 ; CHECK-GI-LABEL: extaddu_v4i32_i64:
1081 ; CHECK-GI: // %bb.0: // %entry
1082 ; CHECK-GI-NEXT: uaddl v2.2d, v0.2s, v1.2s
1083 ; CHECK-GI-NEXT: uaddl2 v1.2d, v0.4s, v1.4s
1084 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
1085 ; CHECK-GI-NEXT: ret
1087 %s0s = zext <4 x i32> %s0 to <4 x i64>
1088 %s1s = zext <4 x i32> %s1 to <4 x i64>
1089 %m = add <4 x i64> %s0s, %s1s
1093 define <8 x i64> @extadds_v8i32_i64(<8 x i32> %s0, <8 x i32> %s1) {
1094 ; CHECK-SD-LABEL: extadds_v8i32_i64:
1095 ; CHECK-SD: // %bb.0: // %entry
1096 ; CHECK-SD-NEXT: saddl2 v4.2d, v1.4s, v3.4s
1097 ; CHECK-SD-NEXT: saddl v5.2d, v0.2s, v2.2s
1098 ; CHECK-SD-NEXT: saddl2 v6.2d, v0.4s, v2.4s
1099 ; CHECK-SD-NEXT: saddl v2.2d, v1.2s, v3.2s
1100 ; CHECK-SD-NEXT: mov v0.16b, v5.16b
1101 ; CHECK-SD-NEXT: mov v1.16b, v6.16b
1102 ; CHECK-SD-NEXT: mov v3.16b, v4.16b
1103 ; CHECK-SD-NEXT: ret
1105 ; CHECK-GI-LABEL: extadds_v8i32_i64:
1106 ; CHECK-GI: // %bb.0: // %entry
1107 ; CHECK-GI-NEXT: saddl v4.2d, v0.2s, v2.2s
1108 ; CHECK-GI-NEXT: saddl2 v5.2d, v0.4s, v2.4s
1109 ; CHECK-GI-NEXT: saddl v2.2d, v1.2s, v3.2s
1110 ; CHECK-GI-NEXT: saddl2 v3.2d, v1.4s, v3.4s
1111 ; CHECK-GI-NEXT: mov v0.16b, v4.16b
1112 ; CHECK-GI-NEXT: mov v1.16b, v5.16b
1113 ; CHECK-GI-NEXT: ret
1115 %s0s = sext <8 x i32> %s0 to <8 x i64>
1116 %s1s = sext <8 x i32> %s1 to <8 x i64>
1117 %m = add <8 x i64> %s0s, %s1s
1121 define <8 x i64> @extaddu_v8i32_i64(<8 x i32> %s0, <8 x i32> %s1) {
1122 ; CHECK-SD-LABEL: extaddu_v8i32_i64:
1123 ; CHECK-SD: // %bb.0: // %entry
1124 ; CHECK-SD-NEXT: uaddl2 v4.2d, v1.4s, v3.4s
1125 ; CHECK-SD-NEXT: uaddl v5.2d, v0.2s, v2.2s
1126 ; CHECK-SD-NEXT: uaddl2 v6.2d, v0.4s, v2.4s
1127 ; CHECK-SD-NEXT: uaddl v2.2d, v1.2s, v3.2s
1128 ; CHECK-SD-NEXT: mov v0.16b, v5.16b
1129 ; CHECK-SD-NEXT: mov v1.16b, v6.16b
1130 ; CHECK-SD-NEXT: mov v3.16b, v4.16b
1131 ; CHECK-SD-NEXT: ret
1133 ; CHECK-GI-LABEL: extaddu_v8i32_i64:
1134 ; CHECK-GI: // %bb.0: // %entry
1135 ; CHECK-GI-NEXT: uaddl v4.2d, v0.2s, v2.2s
1136 ; CHECK-GI-NEXT: uaddl2 v5.2d, v0.4s, v2.4s
1137 ; CHECK-GI-NEXT: uaddl v2.2d, v1.2s, v3.2s
1138 ; CHECK-GI-NEXT: uaddl2 v3.2d, v1.4s, v3.4s
1139 ; CHECK-GI-NEXT: mov v0.16b, v4.16b
1140 ; CHECK-GI-NEXT: mov v1.16b, v5.16b
1141 ; CHECK-GI-NEXT: ret
1143 %s0s = zext <8 x i32> %s0 to <8 x i64>
1144 %s1s = zext <8 x i32> %s1 to <8 x i64>
1145 %m = add <8 x i64> %s0s, %s1s
1149 define <16 x i32> @add_zs(<16 x i8> %s0, <16 x i8> %s1) {
1150 ; CHECK-SD-LABEL: add_zs:
1151 ; CHECK-SD: // %bb.0: // %entry
1152 ; CHECK-SD-NEXT: ushll v2.8h, v0.8b, #0
1153 ; CHECK-SD-NEXT: ushll2 v0.8h, v0.16b, #0
1154 ; CHECK-SD-NEXT: saddw v2.8h, v2.8h, v1.8b
1155 ; CHECK-SD-NEXT: saddw2 v4.8h, v0.8h, v1.16b
1156 ; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0
1157 ; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0
1158 ; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0
1159 ; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0
1160 ; CHECK-SD-NEXT: ret
1162 ; CHECK-GI-LABEL: add_zs:
1163 ; CHECK-GI: // %bb.0: // %entry
1164 ; CHECK-GI-NEXT: sshll v2.8h, v1.8b, #0
1165 ; CHECK-GI-NEXT: sshll2 v1.8h, v1.16b, #0
1166 ; CHECK-GI-NEXT: ushll v3.8h, v0.8b, #0
1167 ; CHECK-GI-NEXT: ushll2 v4.8h, v0.16b, #0
1168 ; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0
1169 ; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0
1170 ; CHECK-GI-NEXT: sshll v5.4s, v1.4h, #0
1171 ; CHECK-GI-NEXT: sshll2 v6.4s, v1.8h, #0
1172 ; CHECK-GI-NEXT: uaddw v0.4s, v0.4s, v3.4h
1173 ; CHECK-GI-NEXT: uaddw2 v1.4s, v2.4s, v3.8h
1174 ; CHECK-GI-NEXT: uaddw v2.4s, v5.4s, v4.4h
1175 ; CHECK-GI-NEXT: uaddw2 v3.4s, v6.4s, v4.8h
1176 ; CHECK-GI-NEXT: ret
1178 %s0s = zext <16 x i8> %s0 to <16 x i32>
1179 %s1s = sext <16 x i8> %s1 to <16 x i32>
1180 %m = add <16 x i32> %s0s, %s1s
1184 define <20 x i32> @v20(<20 x i8> %s0, <20 x i8> %s1) {
1185 ; CHECK-SD-LABEL: v20:
1186 ; CHECK-SD: // %bb.0: // %entry
1187 ; CHECK-SD-NEXT: fmov s0, w0
1188 ; CHECK-SD-NEXT: ldr b2, [sp, #160]
1189 ; CHECK-SD-NEXT: add x10, sp, #168
1190 ; CHECK-SD-NEXT: ldr b3, [sp]
1191 ; CHECK-SD-NEXT: add x11, sp, #8
1192 ; CHECK-SD-NEXT: ldr b1, [sp, #96]
1193 ; CHECK-SD-NEXT: ld1 { v2.b }[1], [x10]
1194 ; CHECK-SD-NEXT: add x9, sp, #104
1195 ; CHECK-SD-NEXT: add x10, sp, #176
1196 ; CHECK-SD-NEXT: mov v0.b[1], w1
1197 ; CHECK-SD-NEXT: ld1 { v3.b }[1], [x11]
1198 ; CHECK-SD-NEXT: ld1 { v1.b }[1], [x9]
1199 ; CHECK-SD-NEXT: add x11, sp, #16
1200 ; CHECK-SD-NEXT: add x9, sp, #112
1201 ; CHECK-SD-NEXT: add x13, sp, #184
1202 ; CHECK-SD-NEXT: ld1 { v2.b }[2], [x10]
1203 ; CHECK-SD-NEXT: add x12, sp, #120
1204 ; CHECK-SD-NEXT: add x14, sp, #32
1205 ; CHECK-SD-NEXT: ld1 { v3.b }[2], [x11]
1206 ; CHECK-SD-NEXT: ld1 { v1.b }[2], [x9]
1207 ; CHECK-SD-NEXT: ldr b5, [sp, #64]
1208 ; CHECK-SD-NEXT: mov v0.b[2], w2
1209 ; CHECK-SD-NEXT: ldr b4, [sp, #224]
1210 ; CHECK-SD-NEXT: add x11, sp, #128
1211 ; CHECK-SD-NEXT: ld1 { v2.b }[3], [x13]
1212 ; CHECK-SD-NEXT: add x13, sp, #24
1213 ; CHECK-SD-NEXT: add x10, sp, #136
1214 ; CHECK-SD-NEXT: ld1 { v3.b }[3], [x13]
1215 ; CHECK-SD-NEXT: ld1 { v1.b }[3], [x12]
1216 ; CHECK-SD-NEXT: add x12, sp, #192
1217 ; CHECK-SD-NEXT: add x13, sp, #200
1218 ; CHECK-SD-NEXT: add x15, sp, #80
1219 ; CHECK-SD-NEXT: add x9, sp, #144
1220 ; CHECK-SD-NEXT: mov v0.b[3], w3
1221 ; CHECK-SD-NEXT: ld1 { v2.b }[4], [x12]
1222 ; CHECK-SD-NEXT: add x12, sp, #232
1223 ; CHECK-SD-NEXT: ld1 { v3.b }[4], [x14]
1224 ; CHECK-SD-NEXT: add x14, sp, #72
1225 ; CHECK-SD-NEXT: ld1 { v4.b }[1], [x12]
1226 ; CHECK-SD-NEXT: ld1 { v5.b }[1], [x14]
1227 ; CHECK-SD-NEXT: add x14, sp, #40
1228 ; CHECK-SD-NEXT: ld1 { v1.b }[4], [x11]
1229 ; CHECK-SD-NEXT: ld1 { v2.b }[5], [x13]
1230 ; CHECK-SD-NEXT: add x12, sp, #208
1231 ; CHECK-SD-NEXT: add x13, sp, #48
1232 ; CHECK-SD-NEXT: mov v0.b[4], w4
1233 ; CHECK-SD-NEXT: ld1 { v3.b }[5], [x14]
1234 ; CHECK-SD-NEXT: add x14, sp, #240
1235 ; CHECK-SD-NEXT: ld1 { v4.b }[2], [x14]
1236 ; CHECK-SD-NEXT: ld1 { v5.b }[2], [x15]
1237 ; CHECK-SD-NEXT: ld1 { v1.b }[5], [x10]
1238 ; CHECK-SD-NEXT: ld1 { v2.b }[6], [x12]
1239 ; CHECK-SD-NEXT: add x11, sp, #216
1240 ; CHECK-SD-NEXT: add x10, sp, #56
1241 ; CHECK-SD-NEXT: ld1 { v3.b }[6], [x13]
1242 ; CHECK-SD-NEXT: add x12, sp, #248
1243 ; CHECK-SD-NEXT: add x13, sp, #88
1244 ; CHECK-SD-NEXT: mov v0.b[5], w5
1245 ; CHECK-SD-NEXT: ld1 { v4.b }[3], [x12]
1246 ; CHECK-SD-NEXT: ld1 { v5.b }[3], [x13]
1247 ; CHECK-SD-NEXT: ld1 { v1.b }[6], [x9]
1248 ; CHECK-SD-NEXT: ld1 { v2.b }[7], [x11]
1249 ; CHECK-SD-NEXT: add x9, sp, #152
1250 ; CHECK-SD-NEXT: ld1 { v3.b }[7], [x10]
1251 ; CHECK-SD-NEXT: uaddl v4.8h, v5.8b, v4.8b
1252 ; CHECK-SD-NEXT: mov v0.b[6], w6
1253 ; CHECK-SD-NEXT: ld1 { v1.b }[7], [x9]
1254 ; CHECK-SD-NEXT: uaddl v2.8h, v3.8b, v2.8b
1255 ; CHECK-SD-NEXT: ushll v3.4s, v4.4h, #0
1256 ; CHECK-SD-NEXT: mov v0.b[7], w7
1257 ; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
1258 ; CHECK-SD-NEXT: ushll2 v1.4s, v2.8h, #0
1259 ; CHECK-SD-NEXT: ushll v2.4s, v2.4h, #0
1260 ; CHECK-SD-NEXT: stp q1, q3, [x8, #48]
1261 ; CHECK-SD-NEXT: ushll2 v3.4s, v0.8h, #0
1262 ; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
1263 ; CHECK-SD-NEXT: stp q3, q2, [x8, #16]
1264 ; CHECK-SD-NEXT: str q0, [x8]
1265 ; CHECK-SD-NEXT: ret
1267 ; CHECK-GI-LABEL: v20:
1268 ; CHECK-GI: // %bb.0: // %entry
1269 ; CHECK-GI-NEXT: and w9, w0, #0xff
1270 ; CHECK-GI-NEXT: ldrb w10, [sp, #96]
1271 ; CHECK-GI-NEXT: and w11, w1, #0xff
1272 ; CHECK-GI-NEXT: fmov s0, w9
1273 ; CHECK-GI-NEXT: ldrb w9, [sp]
1274 ; CHECK-GI-NEXT: ldrb w12, [sp, #104]
1275 ; CHECK-GI-NEXT: fmov s2, w10
1276 ; CHECK-GI-NEXT: ldrb w10, [sp, #160]
1277 ; CHECK-GI-NEXT: fmov s1, w9
1278 ; CHECK-GI-NEXT: ldrb w9, [sp, #168]
1279 ; CHECK-GI-NEXT: mov v0.h[1], w11
1280 ; CHECK-GI-NEXT: ldrb w11, [sp, #8]
1281 ; CHECK-GI-NEXT: fmov s3, w10
1282 ; CHECK-GI-NEXT: mov v2.h[1], w12
1283 ; CHECK-GI-NEXT: and w10, w2, #0xff
1284 ; CHECK-GI-NEXT: and w12, w5, #0xff
1285 ; CHECK-GI-NEXT: mov v1.h[1], w11
1286 ; CHECK-GI-NEXT: and w11, w4, #0xff
1287 ; CHECK-GI-NEXT: mov v3.h[1], w9
1288 ; CHECK-GI-NEXT: ldrb w9, [sp, #112]
1289 ; CHECK-GI-NEXT: mov v0.h[2], w10
1290 ; CHECK-GI-NEXT: ldrb w10, [sp, #16]
1291 ; CHECK-GI-NEXT: mov v2.h[2], w9
1292 ; CHECK-GI-NEXT: ldrb w9, [sp, #176]
1293 ; CHECK-GI-NEXT: mov v1.h[2], w10
1294 ; CHECK-GI-NEXT: and w10, w3, #0xff
1295 ; CHECK-GI-NEXT: mov v3.h[2], w9
1296 ; CHECK-GI-NEXT: ldrb w9, [sp, #120]
1297 ; CHECK-GI-NEXT: mov v0.h[3], w10
1298 ; CHECK-GI-NEXT: ldrb w10, [sp, #24]
1299 ; CHECK-GI-NEXT: mov v2.h[3], w9
1300 ; CHECK-GI-NEXT: ldrb w9, [sp, #184]
1301 ; CHECK-GI-NEXT: mov v1.h[3], w10
1302 ; CHECK-GI-NEXT: ldr w10, [sp, #64]
1303 ; CHECK-GI-NEXT: mov v3.h[3], w9
1304 ; CHECK-GI-NEXT: ldrb w9, [sp, #128]
1305 ; CHECK-GI-NEXT: mov v0.h[4], w11
1306 ; CHECK-GI-NEXT: ldrb w11, [sp, #32]
1307 ; CHECK-GI-NEXT: fmov s4, w10
1308 ; CHECK-GI-NEXT: ldrb w10, [sp, #192]
1309 ; CHECK-GI-NEXT: mov v2.h[4], w9
1310 ; CHECK-GI-NEXT: ldr w9, [sp, #72]
1311 ; CHECK-GI-NEXT: mov v1.h[4], w11
1312 ; CHECK-GI-NEXT: ldr w11, [sp, #224]
1313 ; CHECK-GI-NEXT: mov v3.h[4], w10
1314 ; CHECK-GI-NEXT: ldrb w10, [sp, #136]
1315 ; CHECK-GI-NEXT: mov v4.b[1], w9
1316 ; CHECK-GI-NEXT: fmov s5, w11
1317 ; CHECK-GI-NEXT: ldr w11, [sp, #232]
1318 ; CHECK-GI-NEXT: mov v0.h[5], w12
1319 ; CHECK-GI-NEXT: ldrb w12, [sp, #40]
1320 ; CHECK-GI-NEXT: mov v2.h[5], w10
1321 ; CHECK-GI-NEXT: ldrb w10, [sp, #200]
1322 ; CHECK-GI-NEXT: ldrb w9, [sp, #144]
1323 ; CHECK-GI-NEXT: mov v5.b[1], w11
1324 ; CHECK-GI-NEXT: mov v1.h[5], w12
1325 ; CHECK-GI-NEXT: mov v3.h[5], w10
1326 ; CHECK-GI-NEXT: ldr w10, [sp, #80]
1327 ; CHECK-GI-NEXT: ldr w12, [sp, #240]
1328 ; CHECK-GI-NEXT: and w11, w6, #0xff
1329 ; CHECK-GI-NEXT: mov v0.h[6], w11
1330 ; CHECK-GI-NEXT: ldrb w11, [sp, #48]
1331 ; CHECK-GI-NEXT: mov v2.h[6], w9
1332 ; CHECK-GI-NEXT: ldrb w9, [sp, #208]
1333 ; CHECK-GI-NEXT: mov v4.b[2], w10
1334 ; CHECK-GI-NEXT: ldrb w10, [sp, #152]
1335 ; CHECK-GI-NEXT: mov v5.b[2], w12
1336 ; CHECK-GI-NEXT: mov v1.h[6], w11
1337 ; CHECK-GI-NEXT: ldr w11, [sp, #248]
1338 ; CHECK-GI-NEXT: mov v3.h[6], w9
1339 ; CHECK-GI-NEXT: ldr w9, [sp, #88]
1340 ; CHECK-GI-NEXT: and w12, w7, #0xff
1341 ; CHECK-GI-NEXT: mov v0.h[7], w12
1342 ; CHECK-GI-NEXT: mov v2.h[7], w10
1343 ; CHECK-GI-NEXT: ldrb w12, [sp, #56]
1344 ; CHECK-GI-NEXT: mov v4.b[3], w9
1345 ; CHECK-GI-NEXT: ldrb w10, [sp, #216]
1346 ; CHECK-GI-NEXT: mov v5.b[3], w11
1347 ; CHECK-GI-NEXT: mov v1.h[7], w12
1348 ; CHECK-GI-NEXT: mov v3.h[7], w10
1349 ; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h
1350 ; CHECK-GI-NEXT: ushll v2.8h, v4.8b, #0
1351 ; CHECK-GI-NEXT: ushll v4.8h, v5.8b, #0
1352 ; CHECK-GI-NEXT: add v1.8h, v1.8h, v3.8h
1353 ; CHECK-GI-NEXT: ushll v3.4s, v0.4h, #0
1354 ; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
1355 ; CHECK-GI-NEXT: add v2.4h, v2.4h, v4.4h
1356 ; CHECK-GI-NEXT: ushll v4.4s, v1.4h, #0
1357 ; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
1358 ; CHECK-GI-NEXT: stp q3, q0, [x8]
1359 ; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
1360 ; CHECK-GI-NEXT: stp q4, q1, [x8, #32]
1361 ; CHECK-GI-NEXT: str q2, [x8, #64]
1362 ; CHECK-GI-NEXT: ret
1364 %s0s = zext <20 x i8> %s0 to <20 x i32>
1365 %s1s = zext <20 x i8> %s1 to <20 x i32>
1366 %m = add <20 x i32> %s0s, %s1s
1370 define <16 x i32> @i12(<16 x i12> %s0, <16 x i12> %s1) {
1371 ; CHECK-SD-LABEL: i12:
1372 ; CHECK-SD: // %bb.0: // %entry
1373 ; CHECK-SD-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill
1374 ; CHECK-SD-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
1375 ; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
1376 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
1377 ; CHECK-SD-NEXT: .cfi_offset w19, -8
1378 ; CHECK-SD-NEXT: .cfi_offset w20, -16
1379 ; CHECK-SD-NEXT: .cfi_offset w21, -24
1380 ; CHECK-SD-NEXT: .cfi_offset w22, -32
1381 ; CHECK-SD-NEXT: .cfi_offset w23, -48
1382 ; CHECK-SD-NEXT: ldr w13, [sp, #112]
1383 ; CHECK-SD-NEXT: ldr w14, [sp, #144]
1384 ; CHECK-SD-NEXT: fmov s2, w4
1385 ; CHECK-SD-NEXT: ldr w17, [sp, #176]
1386 ; CHECK-SD-NEXT: ldr w19, [sp, #208]
1387 ; CHECK-SD-NEXT: fmov s3, w0
1388 ; CHECK-SD-NEXT: ldr w20, [sp, #80]
1389 ; CHECK-SD-NEXT: ldr w21, [sp, #48]
1390 ; CHECK-SD-NEXT: fmov s5, w13
1391 ; CHECK-SD-NEXT: fmov s4, w19
1392 ; CHECK-SD-NEXT: fmov s6, w17
1393 ; CHECK-SD-NEXT: fmov s7, w14
1394 ; CHECK-SD-NEXT: fmov s0, w20
1395 ; CHECK-SD-NEXT: fmov s1, w21
1396 ; CHECK-SD-NEXT: ldr w10, [sp, #120]
1397 ; CHECK-SD-NEXT: ldr w11, [sp, #152]
1398 ; CHECK-SD-NEXT: ldr w12, [sp, #184]
1399 ; CHECK-SD-NEXT: ldr w15, [sp, #216]
1400 ; CHECK-SD-NEXT: ldr w22, [sp, #88]
1401 ; CHECK-SD-NEXT: ldr w23, [sp, #56]
1402 ; CHECK-SD-NEXT: mov v2.h[1], w5
1403 ; CHECK-SD-NEXT: mov v3.h[1], w1
1404 ; CHECK-SD-NEXT: mov v5.h[1], w10
1405 ; CHECK-SD-NEXT: mov v4.h[1], w15
1406 ; CHECK-SD-NEXT: mov v0.h[1], w22
1407 ; CHECK-SD-NEXT: mov v1.h[1], w23
1408 ; CHECK-SD-NEXT: mov v6.h[1], w12
1409 ; CHECK-SD-NEXT: mov v7.h[1], w11
1410 ; CHECK-SD-NEXT: ldr w8, [sp, #128]
1411 ; CHECK-SD-NEXT: ldr w9, [sp, #160]
1412 ; CHECK-SD-NEXT: ldr w16, [sp, #64]
1413 ; CHECK-SD-NEXT: ldr w18, [sp, #96]
1414 ; CHECK-SD-NEXT: ldr w10, [sp, #192]
1415 ; CHECK-SD-NEXT: ldr w11, [sp, #224]
1416 ; CHECK-SD-NEXT: mov v2.h[2], w6
1417 ; CHECK-SD-NEXT: mov v3.h[2], w2
1418 ; CHECK-SD-NEXT: mov v0.h[2], w18
1419 ; CHECK-SD-NEXT: mov v1.h[2], w16
1420 ; CHECK-SD-NEXT: mov v5.h[2], w8
1421 ; CHECK-SD-NEXT: mov v4.h[2], w11
1422 ; CHECK-SD-NEXT: mov v6.h[2], w10
1423 ; CHECK-SD-NEXT: mov v7.h[2], w9
1424 ; CHECK-SD-NEXT: ldr w12, [sp, #72]
1425 ; CHECK-SD-NEXT: ldr w13, [sp, #104]
1426 ; CHECK-SD-NEXT: ldr w8, [sp, #136]
1427 ; CHECK-SD-NEXT: ldr w9, [sp, #168]
1428 ; CHECK-SD-NEXT: ldr w10, [sp, #200]
1429 ; CHECK-SD-NEXT: ldr w11, [sp, #232]
1430 ; CHECK-SD-NEXT: mov v0.h[3], w13
1431 ; CHECK-SD-NEXT: mov v1.h[3], w12
1432 ; CHECK-SD-NEXT: mov v2.h[3], w7
1433 ; CHECK-SD-NEXT: mov v3.h[3], w3
1434 ; CHECK-SD-NEXT: mov v5.h[3], w8
1435 ; CHECK-SD-NEXT: mov v4.h[3], w11
1436 ; CHECK-SD-NEXT: mov v6.h[3], w10
1437 ; CHECK-SD-NEXT: mov v7.h[3], w9
1438 ; CHECK-SD-NEXT: movi v16.4s, #15, msl #8
1439 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
1440 ; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
1441 ; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0
1442 ; CHECK-SD-NEXT: ushll v2.4s, v2.4h, #0
1443 ; CHECK-SD-NEXT: ushll v3.4s, v3.4h, #0
1444 ; CHECK-SD-NEXT: ushll v5.4s, v5.4h, #0
1445 ; CHECK-SD-NEXT: ushll v4.4s, v4.4h, #0
1446 ; CHECK-SD-NEXT: ushll v6.4s, v6.4h, #0
1447 ; CHECK-SD-NEXT: ushll v7.4s, v7.4h, #0
1448 ; CHECK-SD-NEXT: and v17.16b, v0.16b, v16.16b
1449 ; CHECK-SD-NEXT: and v18.16b, v1.16b, v16.16b
1450 ; CHECK-SD-NEXT: and v1.16b, v2.16b, v16.16b
1451 ; CHECK-SD-NEXT: and v0.16b, v3.16b, v16.16b
1452 ; CHECK-SD-NEXT: and v2.16b, v5.16b, v16.16b
1453 ; CHECK-SD-NEXT: and v3.16b, v4.16b, v16.16b
1454 ; CHECK-SD-NEXT: and v4.16b, v6.16b, v16.16b
1455 ; CHECK-SD-NEXT: and v5.16b, v7.16b, v16.16b
1456 ; CHECK-SD-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
1457 ; CHECK-SD-NEXT: add v0.4s, v0.4s, v2.4s
1458 ; CHECK-SD-NEXT: add v3.4s, v17.4s, v3.4s
1459 ; CHECK-SD-NEXT: add v1.4s, v1.4s, v5.4s
1460 ; CHECK-SD-NEXT: add v2.4s, v18.4s, v4.4s
1461 ; CHECK-SD-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload
1462 ; CHECK-SD-NEXT: ret
1464 ; CHECK-GI-LABEL: i12:
1465 ; CHECK-GI: // %bb.0: // %entry
1466 ; CHECK-GI-NEXT: ldr w12, [sp]
1467 ; CHECK-GI-NEXT: ldr w14, [sp, #32]
1468 ; CHECK-GI-NEXT: fmov s0, w0
1469 ; CHECK-GI-NEXT: ldr w16, [sp, #128]
1470 ; CHECK-GI-NEXT: ldr w17, [sp, #160]
1471 ; CHECK-GI-NEXT: fmov s1, w4
1472 ; CHECK-GI-NEXT: fmov s2, w12
1473 ; CHECK-GI-NEXT: fmov s3, w14
1474 ; CHECK-GI-NEXT: ldr w12, [sp, #64]
1475 ; CHECK-GI-NEXT: ldr w14, [sp, #96]
1476 ; CHECK-GI-NEXT: ldr w13, [sp, #8]
1477 ; CHECK-GI-NEXT: ldr w15, [sp, #40]
1478 ; CHECK-GI-NEXT: fmov s4, w12
1479 ; CHECK-GI-NEXT: fmov s6, w16
1480 ; CHECK-GI-NEXT: fmov s7, w17
1481 ; CHECK-GI-NEXT: fmov s5, w14
1482 ; CHECK-GI-NEXT: mov v2.h[1], w13
1483 ; CHECK-GI-NEXT: mov v3.h[1], w15
1484 ; CHECK-GI-NEXT: ldr w13, [sp, #72]
1485 ; CHECK-GI-NEXT: ldr w15, [sp, #104]
1486 ; CHECK-GI-NEXT: ldr w12, [sp, #136]
1487 ; CHECK-GI-NEXT: ldr w18, [sp, #168]
1488 ; CHECK-GI-NEXT: mov v0.h[1], w1
1489 ; CHECK-GI-NEXT: mov v1.h[1], w5
1490 ; CHECK-GI-NEXT: mov v4.h[1], w13
1491 ; CHECK-GI-NEXT: mov v5.h[1], w15
1492 ; CHECK-GI-NEXT: mov v6.h[1], w12
1493 ; CHECK-GI-NEXT: mov v7.h[1], w18
1494 ; CHECK-GI-NEXT: ldr w10, [sp, #16]
1495 ; CHECK-GI-NEXT: ldr w11, [sp, #48]
1496 ; CHECK-GI-NEXT: ldr w12, [sp, #80]
1497 ; CHECK-GI-NEXT: ldr w13, [sp, #112]
1498 ; CHECK-GI-NEXT: ldr w14, [sp, #144]
1499 ; CHECK-GI-NEXT: ldr w15, [sp, #176]
1500 ; CHECK-GI-NEXT: mov v0.h[2], w2
1501 ; CHECK-GI-NEXT: mov v1.h[2], w6
1502 ; CHECK-GI-NEXT: mov v2.h[2], w10
1503 ; CHECK-GI-NEXT: mov v3.h[2], w11
1504 ; CHECK-GI-NEXT: mov v4.h[2], w12
1505 ; CHECK-GI-NEXT: mov v5.h[2], w13
1506 ; CHECK-GI-NEXT: mov v6.h[2], w14
1507 ; CHECK-GI-NEXT: mov v7.h[2], w15
1508 ; CHECK-GI-NEXT: ldr w8, [sp, #24]
1509 ; CHECK-GI-NEXT: ldr w9, [sp, #56]
1510 ; CHECK-GI-NEXT: ldr w10, [sp, #88]
1511 ; CHECK-GI-NEXT: ldr w11, [sp, #120]
1512 ; CHECK-GI-NEXT: ldr w12, [sp, #152]
1513 ; CHECK-GI-NEXT: ldr w13, [sp, #184]
1514 ; CHECK-GI-NEXT: mov v0.h[3], w3
1515 ; CHECK-GI-NEXT: mov v1.h[3], w7
1516 ; CHECK-GI-NEXT: mov v2.h[3], w8
1517 ; CHECK-GI-NEXT: mov v3.h[3], w9
1518 ; CHECK-GI-NEXT: mov v4.h[3], w10
1519 ; CHECK-GI-NEXT: mov v5.h[3], w11
1520 ; CHECK-GI-NEXT: mov v6.h[3], w12
1521 ; CHECK-GI-NEXT: mov v7.h[3], w13
1522 ; CHECK-GI-NEXT: movi v16.4s, #15, msl #8
1523 ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
1524 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
1525 ; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
1526 ; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0
1527 ; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0
1528 ; CHECK-GI-NEXT: ushll v5.4s, v5.4h, #0
1529 ; CHECK-GI-NEXT: ushll v6.4s, v6.4h, #0
1530 ; CHECK-GI-NEXT: ushll v7.4s, v7.4h, #0
1531 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v16.16b
1532 ; CHECK-GI-NEXT: and v1.16b, v1.16b, v16.16b
1533 ; CHECK-GI-NEXT: and v2.16b, v2.16b, v16.16b
1534 ; CHECK-GI-NEXT: and v3.16b, v3.16b, v16.16b
1535 ; CHECK-GI-NEXT: and v4.16b, v4.16b, v16.16b
1536 ; CHECK-GI-NEXT: and v5.16b, v5.16b, v16.16b
1537 ; CHECK-GI-NEXT: and v6.16b, v6.16b, v16.16b
1538 ; CHECK-GI-NEXT: and v7.16b, v7.16b, v16.16b
1539 ; CHECK-GI-NEXT: add v0.4s, v0.4s, v4.4s
1540 ; CHECK-GI-NEXT: add v1.4s, v1.4s, v5.4s
1541 ; CHECK-GI-NEXT: add v2.4s, v2.4s, v6.4s
1542 ; CHECK-GI-NEXT: add v3.4s, v3.4s, v7.4s
1543 ; CHECK-GI-NEXT: ret
1545 %s0s = zext <16 x i12> %s0 to <16 x i32>
1546 %s1s = zext <16 x i12> %s1 to <16 x i32>
1547 %m = add <16 x i32> %s0s, %s1s
1551 define <16 x i32> @sub_zz(<16 x i8> %s0, <16 x i8> %s1) {
1552 ; CHECK-SD-LABEL: sub_zz:
1553 ; CHECK-SD: // %bb.0: // %entry
1554 ; CHECK-SD-NEXT: usubl v2.8h, v0.8b, v1.8b
1555 ; CHECK-SD-NEXT: usubl2 v4.8h, v0.16b, v1.16b
1556 ; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0
1557 ; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0
1558 ; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0
1559 ; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0
1560 ; CHECK-SD-NEXT: ret
1562 ; CHECK-GI-LABEL: sub_zz:
1563 ; CHECK-GI: // %bb.0: // %entry
1564 ; CHECK-GI-NEXT: usubl v2.8h, v0.8b, v1.8b
1565 ; CHECK-GI-NEXT: usubl2 v3.8h, v0.16b, v1.16b
1566 ; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0
1567 ; CHECK-GI-NEXT: sshll2 v1.4s, v2.8h, #0
1568 ; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0
1569 ; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0
1570 ; CHECK-GI-NEXT: ret
1572 %s0s = zext <16 x i8> %s0 to <16 x i32>
1573 %s1s = zext <16 x i8> %s1 to <16 x i32>
1574 %m = sub <16 x i32> %s0s, %s1s
1578 define <16 x i32> @sub_ss(<16 x i8> %s0, <16 x i8> %s1) {
1579 ; CHECK-SD-LABEL: sub_ss:
1580 ; CHECK-SD: // %bb.0: // %entry
1581 ; CHECK-SD-NEXT: ssubl v2.8h, v0.8b, v1.8b
1582 ; CHECK-SD-NEXT: ssubl2 v4.8h, v0.16b, v1.16b
1583 ; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0
1584 ; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0
1585 ; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0
1586 ; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0
1587 ; CHECK-SD-NEXT: ret
1589 ; CHECK-GI-LABEL: sub_ss:
1590 ; CHECK-GI: // %bb.0: // %entry
1591 ; CHECK-GI-NEXT: ssubl v2.8h, v0.8b, v1.8b
1592 ; CHECK-GI-NEXT: ssubl2 v3.8h, v0.16b, v1.16b
1593 ; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0
1594 ; CHECK-GI-NEXT: sshll2 v1.4s, v2.8h, #0
1595 ; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0
1596 ; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0
1597 ; CHECK-GI-NEXT: ret
1599 %s0s = sext <16 x i8> %s0 to <16 x i32>
1600 %s1s = sext <16 x i8> %s1 to <16 x i32>
1601 %m = sub <16 x i32> %s0s, %s1s
1605 define <16 x i32> @sub_zs(<16 x i8> %s0, <16 x i8> %s1) {
1606 ; CHECK-SD-LABEL: sub_zs:
1607 ; CHECK-SD: // %bb.0: // %entry
1608 ; CHECK-SD-NEXT: ushll v2.8h, v0.8b, #0
1609 ; CHECK-SD-NEXT: ushll2 v0.8h, v0.16b, #0
1610 ; CHECK-SD-NEXT: ssubw v2.8h, v2.8h, v1.8b
1611 ; CHECK-SD-NEXT: ssubw2 v4.8h, v0.8h, v1.16b
1612 ; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0
1613 ; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0
1614 ; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0
1615 ; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0
1616 ; CHECK-SD-NEXT: ret
1618 ; CHECK-GI-LABEL: sub_zs:
1619 ; CHECK-GI: // %bb.0: // %entry
1620 ; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
1621 ; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
1622 ; CHECK-GI-NEXT: sshll v3.8h, v1.8b, #0
1623 ; CHECK-GI-NEXT: sshll2 v4.8h, v1.16b, #0
1624 ; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0
1625 ; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0
1626 ; CHECK-GI-NEXT: ushll v5.4s, v0.4h, #0
1627 ; CHECK-GI-NEXT: ushll2 v6.4s, v0.8h, #0
1628 ; CHECK-GI-NEXT: ssubw v0.4s, v1.4s, v3.4h
1629 ; CHECK-GI-NEXT: ssubw2 v1.4s, v2.4s, v3.8h
1630 ; CHECK-GI-NEXT: ssubw v2.4s, v5.4s, v4.4h
1631 ; CHECK-GI-NEXT: ssubw2 v3.4s, v6.4s, v4.8h
1632 ; CHECK-GI-NEXT: ret
1634 %s0s = zext <16 x i8> %s0 to <16 x i32>
1635 %s1s = sext <16 x i8> %s1 to <16 x i32>
1636 %m = sub <16 x i32> %s0s, %s1s