1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s
4 define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind {
5 ; CHECK-LABEL: sqshl8b:
7 ; CHECK-NEXT: ldr d0, [x0]
8 ; CHECK-NEXT: ldr d1, [x1]
9 ; CHECK-NEXT: sqshl.8b v0, v0, v1
11 %tmp1 = load <8 x i8>, ptr %A
12 %tmp2 = load <8 x i8>, ptr %B
13 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
17 define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind {
18 ; CHECK-LABEL: sqshl4h:
20 ; CHECK-NEXT: ldr d0, [x0]
21 ; CHECK-NEXT: ldr d1, [x1]
22 ; CHECK-NEXT: sqshl.4h v0, v0, v1
24 %tmp1 = load <4 x i16>, ptr %A
25 %tmp2 = load <4 x i16>, ptr %B
26 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
30 define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind {
31 ; CHECK-LABEL: sqshl2s:
33 ; CHECK-NEXT: ldr d0, [x0]
34 ; CHECK-NEXT: ldr d1, [x1]
35 ; CHECK-NEXT: sqshl.2s v0, v0, v1
37 %tmp1 = load <2 x i32>, ptr %A
38 %tmp2 = load <2 x i32>, ptr %B
39 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
43 define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind {
44 ; CHECK-LABEL: sqshl1d:
46 ; CHECK-NEXT: ldr d0, [x0]
47 ; CHECK-NEXT: ldr d1, [x1]
48 ; CHECK-NEXT: sqshl d0, d0, d1
50 %tmp1 = load <1 x i64>, ptr %A
51 %tmp2 = load <1 x i64>, ptr %B
52 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
56 define <1 x i64> @sqshl1d_constant(ptr %A) nounwind {
57 ; CHECK-LABEL: sqshl1d_constant:
59 ; CHECK-NEXT: ldr d0, [x0]
60 ; CHECK-NEXT: sqshl d0, d0, #1
62 %tmp1 = load <1 x i64>, ptr %A
63 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
67 define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind {
68 ; CHECK-LABEL: sqshl_scalar:
70 ; CHECK-NEXT: ldr x8, [x0]
71 ; CHECK-NEXT: ldr x9, [x1]
72 ; CHECK-NEXT: fmov d0, x8
73 ; CHECK-NEXT: fmov d1, x9
74 ; CHECK-NEXT: sqshl d0, d0, d1
75 ; CHECK-NEXT: fmov x0, d0
77 %tmp1 = load i64, ptr %A
78 %tmp2 = load i64, ptr %B
79 %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2)
83 define i64 @sqshl_scalar_constant(ptr %A) nounwind {
84 ; CHECK-LABEL: sqshl_scalar_constant:
86 ; CHECK-NEXT: ldr d0, [x0]
87 ; CHECK-NEXT: sqshl d0, d0, #1
88 ; CHECK-NEXT: fmov x0, d0
90 %tmp1 = load i64, ptr %A
91 %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1)
95 define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind {
96 ; CHECK-LABEL: uqshl8b:
98 ; CHECK-NEXT: ldr d0, [x0]
99 ; CHECK-NEXT: ldr d1, [x1]
100 ; CHECK-NEXT: uqshl.8b v0, v0, v1
102 %tmp1 = load <8 x i8>, ptr %A
103 %tmp2 = load <8 x i8>, ptr %B
104 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
108 define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind {
109 ; CHECK-LABEL: uqshl4h:
111 ; CHECK-NEXT: ldr d0, [x0]
112 ; CHECK-NEXT: ldr d1, [x1]
113 ; CHECK-NEXT: uqshl.4h v0, v0, v1
115 %tmp1 = load <4 x i16>, ptr %A
116 %tmp2 = load <4 x i16>, ptr %B
117 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
121 define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind {
122 ; CHECK-LABEL: uqshl2s:
124 ; CHECK-NEXT: ldr d0, [x0]
125 ; CHECK-NEXT: ldr d1, [x1]
126 ; CHECK-NEXT: uqshl.2s v0, v0, v1
128 %tmp1 = load <2 x i32>, ptr %A
129 %tmp2 = load <2 x i32>, ptr %B
130 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
134 define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind {
135 ; CHECK-LABEL: sqshl16b:
137 ; CHECK-NEXT: ldr q0, [x0]
138 ; CHECK-NEXT: ldr q1, [x1]
139 ; CHECK-NEXT: sqshl.16b v0, v0, v1
141 %tmp1 = load <16 x i8>, ptr %A
142 %tmp2 = load <16 x i8>, ptr %B
143 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
147 define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind {
148 ; CHECK-LABEL: sqshl8h:
150 ; CHECK-NEXT: ldr q0, [x0]
151 ; CHECK-NEXT: ldr q1, [x1]
152 ; CHECK-NEXT: sqshl.8h v0, v0, v1
154 %tmp1 = load <8 x i16>, ptr %A
155 %tmp2 = load <8 x i16>, ptr %B
156 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
160 define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind {
161 ; CHECK-LABEL: sqshl4s:
163 ; CHECK-NEXT: ldr q0, [x0]
164 ; CHECK-NEXT: ldr q1, [x1]
165 ; CHECK-NEXT: sqshl.4s v0, v0, v1
167 %tmp1 = load <4 x i32>, ptr %A
168 %tmp2 = load <4 x i32>, ptr %B
169 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
173 define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind {
174 ; CHECK-LABEL: sqshl2d:
176 ; CHECK-NEXT: ldr q0, [x0]
177 ; CHECK-NEXT: ldr q1, [x1]
178 ; CHECK-NEXT: sqshl.2d v0, v0, v1
180 %tmp1 = load <2 x i64>, ptr %A
181 %tmp2 = load <2 x i64>, ptr %B
182 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
186 define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind {
187 ; CHECK-LABEL: uqshl16b:
189 ; CHECK-NEXT: ldr q0, [x0]
190 ; CHECK-NEXT: ldr q1, [x1]
191 ; CHECK-NEXT: uqshl.16b v0, v0, v1
193 %tmp1 = load <16 x i8>, ptr %A
194 %tmp2 = load <16 x i8>, ptr %B
195 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
199 define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind {
200 ; CHECK-LABEL: uqshl8h:
202 ; CHECK-NEXT: ldr q0, [x0]
203 ; CHECK-NEXT: ldr q1, [x1]
204 ; CHECK-NEXT: uqshl.8h v0, v0, v1
206 %tmp1 = load <8 x i16>, ptr %A
207 %tmp2 = load <8 x i16>, ptr %B
208 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
212 define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind {
213 ; CHECK-LABEL: uqshl4s:
215 ; CHECK-NEXT: ldr q0, [x0]
216 ; CHECK-NEXT: ldr q1, [x1]
217 ; CHECK-NEXT: uqshl.4s v0, v0, v1
219 %tmp1 = load <4 x i32>, ptr %A
220 %tmp2 = load <4 x i32>, ptr %B
221 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
225 define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind {
226 ; CHECK-LABEL: uqshl2d:
228 ; CHECK-NEXT: ldr q0, [x0]
229 ; CHECK-NEXT: ldr q1, [x1]
230 ; CHECK-NEXT: uqshl.2d v0, v0, v1
232 %tmp1 = load <2 x i64>, ptr %A
233 %tmp2 = load <2 x i64>, ptr %B
234 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
238 define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind {
239 ; CHECK-LABEL: uqshl1d:
241 ; CHECK-NEXT: ldr d0, [x0]
242 ; CHECK-NEXT: ldr d1, [x1]
243 ; CHECK-NEXT: uqshl d0, d0, d1
245 %tmp1 = load <1 x i64>, ptr %A
246 %tmp2 = load <1 x i64>, ptr %B
247 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
251 define <1 x i64> @uqshl1d_constant(ptr %A) nounwind {
252 ; CHECK-LABEL: uqshl1d_constant:
254 ; CHECK-NEXT: ldr d0, [x0]
255 ; CHECK-NEXT: uqshl d0, d0, #1
257 %tmp1 = load <1 x i64>, ptr %A
258 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
262 define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind {
263 ; CHECK-LABEL: uqshl_scalar:
265 ; CHECK-NEXT: ldr x8, [x0]
266 ; CHECK-NEXT: ldr x9, [x1]
267 ; CHECK-NEXT: fmov d0, x8
268 ; CHECK-NEXT: fmov d1, x9
269 ; CHECK-NEXT: uqshl d0, d0, d1
270 ; CHECK-NEXT: fmov x0, d0
272 %tmp1 = load i64, ptr %A
273 %tmp2 = load i64, ptr %B
274 %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2)
278 define i64 @uqshl_scalar_constant(ptr %A) nounwind {
279 ; CHECK-LABEL: uqshl_scalar_constant:
281 ; CHECK-NEXT: ldr d0, [x0]
282 ; CHECK-NEXT: uqshl d0, d0, #1
283 ; CHECK-NEXT: fmov x0, d0
285 %tmp1 = load i64, ptr %A
286 %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1)
290 declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
291 declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
292 declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
293 declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
294 declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone
297 declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
298 declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
299 declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
300 declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
301 declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone
303 declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
304 declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
305 declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
306 declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
308 declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
309 declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
310 declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
311 declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
313 define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind {
314 ; CHECK-LABEL: srshl8b:
316 ; CHECK-NEXT: ldr d0, [x0]
317 ; CHECK-NEXT: ldr d1, [x1]
318 ; CHECK-NEXT: srshl.8b v0, v0, v1
320 %tmp1 = load <8 x i8>, ptr %A
321 %tmp2 = load <8 x i8>, ptr %B
322 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
326 define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind {
327 ; CHECK-LABEL: srshl4h:
329 ; CHECK-NEXT: ldr d0, [x0]
330 ; CHECK-NEXT: ldr d1, [x1]
331 ; CHECK-NEXT: srshl.4h v0, v0, v1
333 %tmp1 = load <4 x i16>, ptr %A
334 %tmp2 = load <4 x i16>, ptr %B
335 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
339 define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind {
340 ; CHECK-LABEL: srshl2s:
342 ; CHECK-NEXT: ldr d0, [x0]
343 ; CHECK-NEXT: ldr d1, [x1]
344 ; CHECK-NEXT: srshl.2s v0, v0, v1
346 %tmp1 = load <2 x i32>, ptr %A
347 %tmp2 = load <2 x i32>, ptr %B
348 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
352 define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind {
353 ; CHECK-LABEL: srshl1d:
355 ; CHECK-NEXT: ldr d0, [x0]
356 ; CHECK-NEXT: ldr d1, [x1]
357 ; CHECK-NEXT: srshl d0, d0, d1
359 %tmp1 = load <1 x i64>, ptr %A
360 %tmp2 = load <1 x i64>, ptr %B
361 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
365 define <1 x i64> @srshl1d_constant(ptr %A) nounwind {
366 ; CHECK-LABEL: srshl1d_constant:
368 ; CHECK-NEXT: mov w8, #1 // =0x1
369 ; CHECK-NEXT: ldr d0, [x0]
370 ; CHECK-NEXT: fmov d1, x8
371 ; CHECK-NEXT: srshl d0, d0, d1
373 %tmp1 = load <1 x i64>, ptr %A
374 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
378 define i64 @srshl_scalar(ptr %A, ptr %B) nounwind {
379 ; CHECK-LABEL: srshl_scalar:
381 ; CHECK-NEXT: ldr x8, [x0]
382 ; CHECK-NEXT: ldr x9, [x1]
383 ; CHECK-NEXT: fmov d0, x8
384 ; CHECK-NEXT: fmov d1, x9
385 ; CHECK-NEXT: srshl d0, d0, d1
386 ; CHECK-NEXT: fmov x0, d0
388 %tmp1 = load i64, ptr %A
389 %tmp2 = load i64, ptr %B
390 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2)
394 define i64 @srshl_scalar_constant(ptr %A) nounwind {
395 ; CHECK-LABEL: srshl_scalar_constant:
397 ; CHECK-NEXT: ldr x8, [x0]
398 ; CHECK-NEXT: mov w9, #1 // =0x1
399 ; CHECK-NEXT: fmov d1, x9
400 ; CHECK-NEXT: fmov d0, x8
401 ; CHECK-NEXT: srshl d0, d0, d1
402 ; CHECK-NEXT: fmov x0, d0
404 %tmp1 = load i64, ptr %A
405 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1)
409 define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind {
410 ; CHECK-LABEL: urshl8b:
412 ; CHECK-NEXT: ldr d0, [x0]
413 ; CHECK-NEXT: ldr d1, [x1]
414 ; CHECK-NEXT: urshl.8b v0, v0, v1
416 %tmp1 = load <8 x i8>, ptr %A
417 %tmp2 = load <8 x i8>, ptr %B
418 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
422 define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind {
423 ; CHECK-LABEL: urshl4h:
425 ; CHECK-NEXT: ldr d0, [x0]
426 ; CHECK-NEXT: ldr d1, [x1]
427 ; CHECK-NEXT: urshl.4h v0, v0, v1
429 %tmp1 = load <4 x i16>, ptr %A
430 %tmp2 = load <4 x i16>, ptr %B
431 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
435 define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind {
436 ; CHECK-LABEL: urshl2s:
438 ; CHECK-NEXT: ldr d0, [x0]
439 ; CHECK-NEXT: ldr d1, [x1]
440 ; CHECK-NEXT: urshl.2s v0, v0, v1
442 %tmp1 = load <2 x i32>, ptr %A
443 %tmp2 = load <2 x i32>, ptr %B
444 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
448 define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind {
449 ; CHECK-LABEL: urshl1d:
451 ; CHECK-NEXT: ldr d0, [x0]
452 ; CHECK-NEXT: ldr d1, [x1]
453 ; CHECK-NEXT: urshl d0, d0, d1
455 %tmp1 = load <1 x i64>, ptr %A
456 %tmp2 = load <1 x i64>, ptr %B
457 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
461 define <1 x i64> @urshl1d_constant(ptr %A) nounwind {
462 ; CHECK-LABEL: urshl1d_constant:
464 ; CHECK-NEXT: mov w8, #1 // =0x1
465 ; CHECK-NEXT: ldr d0, [x0]
466 ; CHECK-NEXT: fmov d1, x8
467 ; CHECK-NEXT: urshl d0, d0, d1
469 %tmp1 = load <1 x i64>, ptr %A
470 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
474 define i64 @urshl_scalar(ptr %A, ptr %B) nounwind {
475 ; CHECK-LABEL: urshl_scalar:
477 ; CHECK-NEXT: ldr x8, [x0]
478 ; CHECK-NEXT: ldr x9, [x1]
479 ; CHECK-NEXT: fmov d0, x8
480 ; CHECK-NEXT: fmov d1, x9
481 ; CHECK-NEXT: urshl d0, d0, d1
482 ; CHECK-NEXT: fmov x0, d0
484 %tmp1 = load i64, ptr %A
485 %tmp2 = load i64, ptr %B
486 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2)
490 define i64 @urshl_scalar_constant(ptr %A) nounwind {
491 ; CHECK-LABEL: urshl_scalar_constant:
493 ; CHECK-NEXT: ldr x8, [x0]
494 ; CHECK-NEXT: mov w9, #1 // =0x1
495 ; CHECK-NEXT: fmov d1, x9
496 ; CHECK-NEXT: fmov d0, x8
497 ; CHECK-NEXT: urshl d0, d0, d1
498 ; CHECK-NEXT: fmov x0, d0
500 %tmp1 = load i64, ptr %A
501 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1)
505 define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind {
506 ; CHECK-LABEL: srshl16b:
508 ; CHECK-NEXT: ldr q0, [x0]
509 ; CHECK-NEXT: ldr q1, [x1]
510 ; CHECK-NEXT: srshl.16b v0, v0, v1
512 %tmp1 = load <16 x i8>, ptr %A
513 %tmp2 = load <16 x i8>, ptr %B
514 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
518 define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind {
519 ; CHECK-LABEL: srshl8h:
521 ; CHECK-NEXT: ldr q0, [x0]
522 ; CHECK-NEXT: ldr q1, [x1]
523 ; CHECK-NEXT: srshl.8h v0, v0, v1
525 %tmp1 = load <8 x i16>, ptr %A
526 %tmp2 = load <8 x i16>, ptr %B
527 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
531 define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind {
532 ; CHECK-LABEL: srshl4s:
534 ; CHECK-NEXT: ldr q0, [x0]
535 ; CHECK-NEXT: ldr q1, [x1]
536 ; CHECK-NEXT: srshl.4s v0, v0, v1
538 %tmp1 = load <4 x i32>, ptr %A
539 %tmp2 = load <4 x i32>, ptr %B
540 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
544 define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind {
545 ; CHECK-LABEL: srshl2d:
547 ; CHECK-NEXT: ldr q0, [x0]
548 ; CHECK-NEXT: ldr q1, [x1]
549 ; CHECK-NEXT: srshl.2d v0, v0, v1
551 %tmp1 = load <2 x i64>, ptr %A
552 %tmp2 = load <2 x i64>, ptr %B
553 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
557 define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind {
558 ; CHECK-LABEL: urshl16b:
560 ; CHECK-NEXT: ldr q0, [x0]
561 ; CHECK-NEXT: ldr q1, [x1]
562 ; CHECK-NEXT: urshl.16b v0, v0, v1
564 %tmp1 = load <16 x i8>, ptr %A
565 %tmp2 = load <16 x i8>, ptr %B
566 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
570 define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind {
571 ; CHECK-LABEL: urshl8h:
573 ; CHECK-NEXT: ldr q0, [x0]
574 ; CHECK-NEXT: ldr q1, [x1]
575 ; CHECK-NEXT: urshl.8h v0, v0, v1
577 %tmp1 = load <8 x i16>, ptr %A
578 %tmp2 = load <8 x i16>, ptr %B
579 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
583 define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind {
584 ; CHECK-LABEL: urshl4s:
586 ; CHECK-NEXT: ldr q0, [x0]
587 ; CHECK-NEXT: ldr q1, [x1]
588 ; CHECK-NEXT: urshl.4s v0, v0, v1
590 %tmp1 = load <4 x i32>, ptr %A
591 %tmp2 = load <4 x i32>, ptr %B
592 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
596 define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind {
597 ; CHECK-LABEL: urshl2d:
599 ; CHECK-NEXT: ldr q0, [x0]
600 ; CHECK-NEXT: ldr q1, [x1]
601 ; CHECK-NEXT: urshl.2d v0, v0, v1
603 %tmp1 = load <2 x i64>, ptr %A
604 %tmp2 = load <2 x i64>, ptr %B
605 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
609 declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
610 declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
611 declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
612 declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
613 declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone
615 declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
616 declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
617 declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
618 declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
619 declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone
621 declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
622 declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
623 declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
624 declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
626 declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
627 declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
628 declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
629 declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
631 define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind {
632 ; CHECK-LABEL: sqrshl8b:
634 ; CHECK-NEXT: ldr d0, [x0]
635 ; CHECK-NEXT: ldr d1, [x1]
636 ; CHECK-NEXT: sqrshl.8b v0, v0, v1
638 %tmp1 = load <8 x i8>, ptr %A
639 %tmp2 = load <8 x i8>, ptr %B
640 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
644 define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind {
645 ; CHECK-LABEL: sqrshl4h:
647 ; CHECK-NEXT: ldr d0, [x0]
648 ; CHECK-NEXT: ldr d1, [x1]
649 ; CHECK-NEXT: sqrshl.4h v0, v0, v1
651 %tmp1 = load <4 x i16>, ptr %A
652 %tmp2 = load <4 x i16>, ptr %B
653 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
657 define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind {
658 ; CHECK-LABEL: sqrshl2s:
660 ; CHECK-NEXT: ldr d0, [x0]
661 ; CHECK-NEXT: ldr d1, [x1]
662 ; CHECK-NEXT: sqrshl.2s v0, v0, v1
664 %tmp1 = load <2 x i32>, ptr %A
665 %tmp2 = load <2 x i32>, ptr %B
666 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
670 define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind {
671 ; CHECK-LABEL: uqrshl8b:
673 ; CHECK-NEXT: ldr d0, [x0]
674 ; CHECK-NEXT: ldr d1, [x1]
675 ; CHECK-NEXT: uqrshl.8b v0, v0, v1
677 %tmp1 = load <8 x i8>, ptr %A
678 %tmp2 = load <8 x i8>, ptr %B
679 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
683 define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind {
684 ; CHECK-LABEL: uqrshl4h:
686 ; CHECK-NEXT: ldr d0, [x0]
687 ; CHECK-NEXT: ldr d1, [x1]
688 ; CHECK-NEXT: uqrshl.4h v0, v0, v1
690 %tmp1 = load <4 x i16>, ptr %A
691 %tmp2 = load <4 x i16>, ptr %B
692 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
696 define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind {
697 ; CHECK-LABEL: uqrshl2s:
699 ; CHECK-NEXT: ldr d0, [x0]
700 ; CHECK-NEXT: ldr d1, [x1]
701 ; CHECK-NEXT: uqrshl.2s v0, v0, v1
703 %tmp1 = load <2 x i32>, ptr %A
704 %tmp2 = load <2 x i32>, ptr %B
705 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
709 define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind {
710 ; CHECK-LABEL: sqrshl16b:
712 ; CHECK-NEXT: ldr q0, [x0]
713 ; CHECK-NEXT: ldr q1, [x1]
714 ; CHECK-NEXT: sqrshl.16b v0, v0, v1
716 %tmp1 = load <16 x i8>, ptr %A
717 %tmp2 = load <16 x i8>, ptr %B
718 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
722 define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind {
723 ; CHECK-LABEL: sqrshl8h:
725 ; CHECK-NEXT: ldr q0, [x0]
726 ; CHECK-NEXT: ldr q1, [x1]
727 ; CHECK-NEXT: sqrshl.8h v0, v0, v1
729 %tmp1 = load <8 x i16>, ptr %A
730 %tmp2 = load <8 x i16>, ptr %B
731 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
735 define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind {
736 ; CHECK-LABEL: sqrshl4s:
738 ; CHECK-NEXT: ldr q0, [x0]
739 ; CHECK-NEXT: ldr q1, [x1]
740 ; CHECK-NEXT: sqrshl.4s v0, v0, v1
742 %tmp1 = load <4 x i32>, ptr %A
743 %tmp2 = load <4 x i32>, ptr %B
744 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
748 define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind {
749 ; CHECK-LABEL: sqrshl2d:
751 ; CHECK-NEXT: ldr q0, [x0]
752 ; CHECK-NEXT: ldr q1, [x1]
753 ; CHECK-NEXT: sqrshl.2d v0, v0, v1
755 %tmp1 = load <2 x i64>, ptr %A
756 %tmp2 = load <2 x i64>, ptr %B
757 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
761 define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind {
762 ; CHECK-LABEL: sqrshl1d:
764 ; CHECK-NEXT: ldr d0, [x0]
765 ; CHECK-NEXT: ldr d1, [x1]
766 ; CHECK-NEXT: sqrshl d0, d0, d1
768 %tmp1 = load <1 x i64>, ptr %A
769 %tmp2 = load <1 x i64>, ptr %B
770 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
774 define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind {
775 ; CHECK-LABEL: sqrshl1d_constant:
777 ; CHECK-NEXT: mov w8, #1 // =0x1
778 ; CHECK-NEXT: ldr d0, [x0]
779 ; CHECK-NEXT: fmov d1, x8
780 ; CHECK-NEXT: sqrshl d0, d0, d1
782 %tmp1 = load <1 x i64>, ptr %A
783 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
787 define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind {
788 ; CHECK-LABEL: sqrshl_scalar:
790 ; CHECK-NEXT: ldr x8, [x0]
791 ; CHECK-NEXT: ldr x9, [x1]
792 ; CHECK-NEXT: fmov d0, x8
793 ; CHECK-NEXT: fmov d1, x9
794 ; CHECK-NEXT: sqrshl d0, d0, d1
795 ; CHECK-NEXT: fmov x0, d0
797 %tmp1 = load i64, ptr %A
798 %tmp2 = load i64, ptr %B
799 %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2)
803 define i64 @sqrshl_scalar_constant(ptr %A) nounwind {
804 ; CHECK-LABEL: sqrshl_scalar_constant:
806 ; CHECK-NEXT: ldr x8, [x0]
807 ; CHECK-NEXT: mov w9, #1 // =0x1
808 ; CHECK-NEXT: fmov d1, x9
809 ; CHECK-NEXT: fmov d0, x8
810 ; CHECK-NEXT: sqrshl d0, d0, d1
811 ; CHECK-NEXT: fmov x0, d0
813 %tmp1 = load i64, ptr %A
814 %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1)
818 define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind {
819 ; CHECK-LABEL: uqrshl16b:
821 ; CHECK-NEXT: ldr q0, [x0]
822 ; CHECK-NEXT: ldr q1, [x1]
823 ; CHECK-NEXT: uqrshl.16b v0, v0, v1
825 %tmp1 = load <16 x i8>, ptr %A
826 %tmp2 = load <16 x i8>, ptr %B
827 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
831 define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind {
832 ; CHECK-LABEL: uqrshl8h:
834 ; CHECK-NEXT: ldr q0, [x0]
835 ; CHECK-NEXT: ldr q1, [x1]
836 ; CHECK-NEXT: uqrshl.8h v0, v0, v1
838 %tmp1 = load <8 x i16>, ptr %A
839 %tmp2 = load <8 x i16>, ptr %B
840 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
844 define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind {
845 ; CHECK-LABEL: uqrshl4s:
847 ; CHECK-NEXT: ldr q0, [x0]
848 ; CHECK-NEXT: ldr q1, [x1]
849 ; CHECK-NEXT: uqrshl.4s v0, v0, v1
851 %tmp1 = load <4 x i32>, ptr %A
852 %tmp2 = load <4 x i32>, ptr %B
853 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
857 define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind {
858 ; CHECK-LABEL: uqrshl2d:
860 ; CHECK-NEXT: ldr q0, [x0]
861 ; CHECK-NEXT: ldr q1, [x1]
862 ; CHECK-NEXT: uqrshl.2d v0, v0, v1
864 %tmp1 = load <2 x i64>, ptr %A
865 %tmp2 = load <2 x i64>, ptr %B
866 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
870 define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind {
871 ; CHECK-LABEL: uqrshl1d:
873 ; CHECK-NEXT: ldr d0, [x0]
874 ; CHECK-NEXT: ldr d1, [x1]
875 ; CHECK-NEXT: uqrshl d0, d0, d1
877 %tmp1 = load <1 x i64>, ptr %A
878 %tmp2 = load <1 x i64>, ptr %B
879 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
883 define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind {
884 ; CHECK-LABEL: uqrshl1d_constant:
886 ; CHECK-NEXT: mov w8, #1 // =0x1
887 ; CHECK-NEXT: ldr d0, [x0]
888 ; CHECK-NEXT: fmov d1, x8
889 ; CHECK-NEXT: uqrshl d0, d0, d1
891 %tmp1 = load <1 x i64>, ptr %A
892 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
896 define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind {
897 ; CHECK-LABEL: uqrshl_scalar:
899 ; CHECK-NEXT: ldr x8, [x0]
900 ; CHECK-NEXT: ldr x9, [x1]
901 ; CHECK-NEXT: fmov d0, x8
902 ; CHECK-NEXT: fmov d1, x9
903 ; CHECK-NEXT: uqrshl d0, d0, d1
904 ; CHECK-NEXT: fmov x0, d0
906 %tmp1 = load i64, ptr %A
907 %tmp2 = load i64, ptr %B
908 %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2)
912 define i64 @uqrshl_scalar_constant(ptr %A) nounwind {
913 ; CHECK-LABEL: uqrshl_scalar_constant:
915 ; CHECK-NEXT: ldr x8, [x0]
916 ; CHECK-NEXT: mov w9, #1 // =0x1
917 ; CHECK-NEXT: fmov d1, x9
918 ; CHECK-NEXT: fmov d0, x8
919 ; CHECK-NEXT: uqrshl d0, d0, d1
920 ; CHECK-NEXT: fmov x0, d0
922 %tmp1 = load i64, ptr %A
923 %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1)
927 declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
928 declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
929 declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
930 declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
931 declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone
933 declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
934 declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
935 declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
936 declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
937 declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone
939 declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
940 declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
941 declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
942 declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
944 declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
945 declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
946 declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
947 declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
949 define <8 x i8> @urshr8b(ptr %A) nounwind {
950 ; CHECK-LABEL: urshr8b:
952 ; CHECK-NEXT: ldr d0, [x0]
953 ; CHECK-NEXT: urshr.8b v0, v0, #1
955 %tmp1 = load <8 x i8>, ptr %A
956 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
960 define <4 x i16> @urshr4h(ptr %A) nounwind {
961 ; CHECK-LABEL: urshr4h:
963 ; CHECK-NEXT: ldr d0, [x0]
964 ; CHECK-NEXT: urshr.4h v0, v0, #1
966 %tmp1 = load <4 x i16>, ptr %A
967 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
971 define <2 x i32> @urshr2s(ptr %A) nounwind {
972 ; CHECK-LABEL: urshr2s:
974 ; CHECK-NEXT: ldr d0, [x0]
975 ; CHECK-NEXT: urshr.2s v0, v0, #1
977 %tmp1 = load <2 x i32>, ptr %A
978 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
982 define <16 x i8> @urshr16b(ptr %A) nounwind {
983 ; CHECK-LABEL: urshr16b:
985 ; CHECK-NEXT: ldr q0, [x0]
986 ; CHECK-NEXT: urshr.16b v0, v0, #1
988 %tmp1 = load <16 x i8>, ptr %A
989 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
993 define <8 x i16> @urshr8h(ptr %A) nounwind {
994 ; CHECK-LABEL: urshr8h:
996 ; CHECK-NEXT: ldr q0, [x0]
997 ; CHECK-NEXT: urshr.8h v0, v0, #1
999 %tmp1 = load <8 x i16>, ptr %A
1000 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
1004 define <4 x i32> @urshr4s(ptr %A) nounwind {
1005 ; CHECK-LABEL: urshr4s:
1007 ; CHECK-NEXT: ldr q0, [x0]
1008 ; CHECK-NEXT: urshr.4s v0, v0, #1
1010 %tmp1 = load <4 x i32>, ptr %A
1011 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
1015 define <2 x i64> @urshr2d(ptr %A) nounwind {
1016 ; CHECK-LABEL: urshr2d:
1018 ; CHECK-NEXT: ldr q0, [x0]
1019 ; CHECK-NEXT: urshr.2d v0, v0, #1
1021 %tmp1 = load <2 x i64>, ptr %A
1022 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
1026 define <1 x i64> @urshr1d(ptr %A) nounwind {
1027 ; CHECK-LABEL: urshr1d:
1029 ; CHECK-NEXT: ldr d0, [x0]
1030 ; CHECK-NEXT: urshr d0, d0, #1
1032 %tmp1 = load <1 x i64>, ptr %A
1033 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
1037 define i64 @urshr_scalar(ptr %A) nounwind {
1038 ; CHECK-LABEL: urshr_scalar:
1040 ; CHECK-NEXT: ldr d0, [x0]
1041 ; CHECK-NEXT: urshr d0, d0, #1
1042 ; CHECK-NEXT: fmov x0, d0
1044 %tmp1 = load i64, ptr %A
1045 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
1049 define <8 x i8> @srshr8b(ptr %A) nounwind {
1050 ; CHECK-LABEL: srshr8b:
1052 ; CHECK-NEXT: ldr d0, [x0]
1053 ; CHECK-NEXT: srshr.8b v0, v0, #1
1055 %tmp1 = load <8 x i8>, ptr %A
1056 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1060 define <4 x i16> @srshr4h(ptr %A) nounwind {
1061 ; CHECK-LABEL: srshr4h:
1063 ; CHECK-NEXT: ldr d0, [x0]
1064 ; CHECK-NEXT: srshr.4h v0, v0, #1
1066 %tmp1 = load <4 x i16>, ptr %A
1067 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
1071 define <2 x i32> @srshr2s(ptr %A) nounwind {
1072 ; CHECK-LABEL: srshr2s:
1074 ; CHECK-NEXT: ldr d0, [x0]
1075 ; CHECK-NEXT: srshr.2s v0, v0, #1
1077 %tmp1 = load <2 x i32>, ptr %A
1078 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
1082 define <16 x i8> @srshr16b(ptr %A) nounwind {
1083 ; CHECK-LABEL: srshr16b:
1085 ; CHECK-NEXT: ldr q0, [x0]
1086 ; CHECK-NEXT: srshr.16b v0, v0, #1
1088 %tmp1 = load <16 x i8>, ptr %A
1089 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1093 define <8 x i16> @srshr8h(ptr %A) nounwind {
1094 ; CHECK-LABEL: srshr8h:
1096 ; CHECK-NEXT: ldr q0, [x0]
1097 ; CHECK-NEXT: srshr.8h v0, v0, #1
1099 %tmp1 = load <8 x i16>, ptr %A
1100 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
1104 define <4 x i32> @srshr4s(ptr %A) nounwind {
1105 ; CHECK-LABEL: srshr4s:
1107 ; CHECK-NEXT: ldr q0, [x0]
1108 ; CHECK-NEXT: srshr.4s v0, v0, #1
1110 %tmp1 = load <4 x i32>, ptr %A
1111 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
1115 define <2 x i64> @srshr2d(ptr %A) nounwind {
1116 ; CHECK-LABEL: srshr2d:
1118 ; CHECK-NEXT: ldr q0, [x0]
1119 ; CHECK-NEXT: srshr.2d v0, v0, #1
1121 %tmp1 = load <2 x i64>, ptr %A
1122 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
1126 define <1 x i64> @srshr1d(ptr %A) nounwind {
1127 ; CHECK-LABEL: srshr1d:
1129 ; CHECK-NEXT: ldr d0, [x0]
1130 ; CHECK-NEXT: srshr d0, d0, #1
1132 %tmp1 = load <1 x i64>, ptr %A
1133 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
1137 define i64 @srshr_scalar(ptr %A) nounwind {
1138 ; CHECK-LABEL: srshr_scalar:
1140 ; CHECK-NEXT: ldr d0, [x0]
1141 ; CHECK-NEXT: srshr d0, d0, #1
1142 ; CHECK-NEXT: fmov x0, d0
1144 %tmp1 = load i64, ptr %A
1145 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
1149 define <8 x i8> @sqshlu8b(ptr %A) nounwind {
1150 ; CHECK-LABEL: sqshlu8b:
1152 ; CHECK-NEXT: ldr d0, [x0]
1153 ; CHECK-NEXT: sqshlu.8b v0, v0, #1
1155 %tmp1 = load <8 x i8>, ptr %A
1156 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1160 define <4 x i16> @sqshlu4h(ptr %A) nounwind {
1161 ; CHECK-LABEL: sqshlu4h:
1163 ; CHECK-NEXT: ldr d0, [x0]
1164 ; CHECK-NEXT: sqshlu.4h v0, v0, #1
1166 %tmp1 = load <4 x i16>, ptr %A
1167 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
1171 define <2 x i32> @sqshlu2s(ptr %A) nounwind {
1172 ; CHECK-LABEL: sqshlu2s:
1174 ; CHECK-NEXT: ldr d0, [x0]
1175 ; CHECK-NEXT: sqshlu.2s v0, v0, #1
1177 %tmp1 = load <2 x i32>, ptr %A
1178 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
1182 define <16 x i8> @sqshlu16b(ptr %A) nounwind {
1183 ; CHECK-LABEL: sqshlu16b:
1185 ; CHECK-NEXT: ldr q0, [x0]
1186 ; CHECK-NEXT: sqshlu.16b v0, v0, #1
1188 %tmp1 = load <16 x i8>, ptr %A
1189 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1193 define <8 x i16> @sqshlu8h(ptr %A) nounwind {
1194 ; CHECK-LABEL: sqshlu8h:
1196 ; CHECK-NEXT: ldr q0, [x0]
1197 ; CHECK-NEXT: sqshlu.8h v0, v0, #1
1199 %tmp1 = load <8 x i16>, ptr %A
1200 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
1204 define <4 x i32> @sqshlu4s(ptr %A) nounwind {
1205 ; CHECK-LABEL: sqshlu4s:
1207 ; CHECK-NEXT: ldr q0, [x0]
1208 ; CHECK-NEXT: sqshlu.4s v0, v0, #1
1210 %tmp1 = load <4 x i32>, ptr %A
1211 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
1215 define <2 x i64> @sqshlu2d(ptr %A) nounwind {
1216 ; CHECK-LABEL: sqshlu2d:
1218 ; CHECK-NEXT: ldr q0, [x0]
1219 ; CHECK-NEXT: sqshlu.2d v0, v0, #1
1221 %tmp1 = load <2 x i64>, ptr %A
1222 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
1226 define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind {
1227 ; CHECK-LABEL: sqshlu1d_constant:
1229 ; CHECK-NEXT: ldr d0, [x0]
1230 ; CHECK-NEXT: sqshlu d0, d0, #1
1232 %tmp1 = load <1 x i64>, ptr %A
1233 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
1237 define i64 @sqshlu_i64_constant(ptr %A) nounwind {
1238 ; CHECK-LABEL: sqshlu_i64_constant:
1240 ; CHECK-NEXT: ldr d0, [x0]
1241 ; CHECK-NEXT: sqshlu d0, d0, #1
1242 ; CHECK-NEXT: fmov x0, d0
1244 %tmp1 = load i64, ptr %A
1245 %tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1)
1249 define i32 @sqshlu_i32_constant(ptr %A) nounwind {
1250 ; CHECK-LABEL: sqshlu_i32_constant:
1252 ; CHECK-NEXT: ldr w8, [x0]
1253 ; CHECK-NEXT: fmov s0, w8
1254 ; CHECK-NEXT: sqshlu s0, s0, #1
1255 ; CHECK-NEXT: fmov w0, s0
1257 %tmp1 = load i32, ptr %A
1258 %tmp3 = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %tmp1, i32 1)
1262 declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
1263 declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
1264 declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
1265 declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
1266 declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone
1267 declare i32 @llvm.aarch64.neon.sqshlu.i32(i32, i32) nounwind readnone
1269 declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
1270 declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
1271 declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
1272 declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
1274 define <8 x i8> @rshrn8b(ptr %A) nounwind {
1275 ; CHECK-LABEL: rshrn8b:
1277 ; CHECK-NEXT: ldr q0, [x0]
1278 ; CHECK-NEXT: rshrn.8b v0, v0, #1
1280 %tmp1 = load <8 x i16>, ptr %A
1281 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
1285 define <4 x i16> @rshrn4h(ptr %A) nounwind {
1286 ; CHECK-LABEL: rshrn4h:
1288 ; CHECK-NEXT: ldr q0, [x0]
1289 ; CHECK-NEXT: rshrn.4h v0, v0, #1
1291 %tmp1 = load <4 x i32>, ptr %A
1292 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
1296 define <2 x i32> @rshrn2s(ptr %A) nounwind {
1297 ; CHECK-LABEL: rshrn2s:
1299 ; CHECK-NEXT: ldr q0, [x0]
1300 ; CHECK-NEXT: rshrn.2s v0, v0, #1
1302 %tmp1 = load <2 x i64>, ptr %A
1303 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
1307 define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind {
1308 ; CHECK-LABEL: rshrn16b:
1310 ; CHECK-NEXT: ldr d0, [x0]
1311 ; CHECK-NEXT: ldr q1, [x1]
1312 ; CHECK-NEXT: rshrn2.16b v0, v1, #1
1314 %out = load <8 x i8>, ptr %ret
1315 %tmp1 = load <8 x i16>, ptr %A
1316 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
1317 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1321 define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind {
1322 ; CHECK-LABEL: rshrn8h:
1324 ; CHECK-NEXT: ldr d0, [x0]
1325 ; CHECK-NEXT: ldr q1, [x1]
1326 ; CHECK-NEXT: rshrn2.8h v0, v1, #1
1328 %out = load <4 x i16>, ptr %ret
1329 %tmp1 = load <4 x i32>, ptr %A
1330 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
1331 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1335 define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind {
1336 ; CHECK-LABEL: rshrn4s:
1338 ; CHECK-NEXT: ldr d0, [x0]
1339 ; CHECK-NEXT: ldr q1, [x1]
1340 ; CHECK-NEXT: rshrn2.4s v0, v1, #1
1342 %out = load <2 x i32>, ptr %ret
1343 %tmp1 = load <2 x i64>, ptr %A
1344 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
1345 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1349 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
1350 declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
1351 declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
1353 define <8 x i8> @shrn8b(ptr %A) nounwind {
1354 ; CHECK-LABEL: shrn8b:
1356 ; CHECK-NEXT: ldr q0, [x0]
1357 ; CHECK-NEXT: shrn.8b v0, v0, #1
1359 %tmp1 = load <8 x i16>, ptr %A
1360 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1361 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
1365 define <4 x i16> @shrn4h(ptr %A) nounwind {
1366 ; CHECK-LABEL: shrn4h:
1368 ; CHECK-NEXT: ldr q0, [x0]
1369 ; CHECK-NEXT: shrn.4h v0, v0, #1
1371 %tmp1 = load <4 x i32>, ptr %A
1372 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
1373 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
1377 define <2 x i32> @shrn2s(ptr %A) nounwind {
1378 ; CHECK-LABEL: shrn2s:
1380 ; CHECK-NEXT: ldr q0, [x0]
1381 ; CHECK-NEXT: shrn.2s v0, v0, #1
1383 %tmp1 = load <2 x i64>, ptr %A
1384 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
1385 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
1389 define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind {
1390 ; CHECK-LABEL: shrn16b:
1392 ; CHECK-NEXT: ldr d0, [x0]
1393 ; CHECK-NEXT: ldr q1, [x1]
1394 ; CHECK-NEXT: shrn2.16b v0, v1, #1
1396 %out = load <8 x i8>, ptr %ret
1397 %tmp1 = load <8 x i16>, ptr %A
1398 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1399 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
1400 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1404 define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind {
1405 ; CHECK-LABEL: shrn8h:
1407 ; CHECK-NEXT: ldr d0, [x0]
1408 ; CHECK-NEXT: ldr q1, [x1]
1409 ; CHECK-NEXT: shrn2.8h v0, v1, #1
1411 %out = load <4 x i16>, ptr %ret
1412 %tmp1 = load <4 x i32>, ptr %A
1413 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
1414 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
1415 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1419 define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind {
1420 ; CHECK-LABEL: shrn4s:
1422 ; CHECK-NEXT: ldr d0, [x0]
1423 ; CHECK-NEXT: ldr q1, [x1]
1424 ; CHECK-NEXT: shrn2.4s v0, v1, #1
1426 %out = load <2 x i32>, ptr %ret
1427 %tmp1 = load <2 x i64>, ptr %A
1428 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
1429 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
1430 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1434 declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
1435 declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
1436 declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
1438 define i32 @sqshrn1s(i64 %A) nounwind {
1439 ; CHECK-LABEL: sqshrn1s:
1441 ; CHECK-NEXT: fmov d0, x0
1442 ; CHECK-NEXT: sqshrn s0, d0, #1
1443 ; CHECK-NEXT: fmov w0, s0
1445 %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
1449 define <8 x i8> @sqshrn8b(ptr %A) nounwind {
1450 ; CHECK-LABEL: sqshrn8b:
1452 ; CHECK-NEXT: ldr q0, [x0]
1453 ; CHECK-NEXT: sqshrn.8b v0, v0, #1
1455 %tmp1 = load <8 x i16>, ptr %A
1456 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1460 define <4 x i16> @sqshrn4h(ptr %A) nounwind {
1461 ; CHECK-LABEL: sqshrn4h:
1463 ; CHECK-NEXT: ldr q0, [x0]
1464 ; CHECK-NEXT: sqshrn.4h v0, v0, #1
1466 %tmp1 = load <4 x i32>, ptr %A
1467 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1471 define <2 x i32> @sqshrn2s(ptr %A) nounwind {
1472 ; CHECK-LABEL: sqshrn2s:
1474 ; CHECK-NEXT: ldr q0, [x0]
1475 ; CHECK-NEXT: sqshrn.2s v0, v0, #1
1477 %tmp1 = load <2 x i64>, ptr %A
1478 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1483 define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind {
1484 ; CHECK-LABEL: sqshrn16b:
1486 ; CHECK-NEXT: ldr d0, [x0]
1487 ; CHECK-NEXT: ldr q1, [x1]
1488 ; CHECK-NEXT: sqshrn2.16b v0, v1, #1
1490 %out = load <8 x i8>, ptr %ret
1491 %tmp1 = load <8 x i16>, ptr %A
1492 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1493 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1497 define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind {
1498 ; CHECK-LABEL: sqshrn8h:
1500 ; CHECK-NEXT: ldr d0, [x0]
1501 ; CHECK-NEXT: ldr q1, [x1]
1502 ; CHECK-NEXT: sqshrn2.8h v0, v1, #1
1504 %out = load <4 x i16>, ptr %ret
1505 %tmp1 = load <4 x i32>, ptr %A
1506 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1507 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1511 define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind {
1512 ; CHECK-LABEL: sqshrn4s:
1514 ; CHECK-NEXT: ldr d0, [x0]
1515 ; CHECK-NEXT: ldr q1, [x1]
1516 ; CHECK-NEXT: sqshrn2.4s v0, v1, #1
1518 %out = load <2 x i32>, ptr %ret
1519 %tmp1 = load <2 x i64>, ptr %A
1520 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1521 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1525 declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
1526 declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
1527 declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
1528 declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
1530 define i32 @sqshrun1s(i64 %A) nounwind {
1531 ; CHECK-LABEL: sqshrun1s:
1533 ; CHECK-NEXT: fmov d0, x0
1534 ; CHECK-NEXT: sqshrun s0, d0, #1
1535 ; CHECK-NEXT: fmov w0, s0
1537 %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
1541 define <8 x i8> @sqshrun8b(ptr %A) nounwind {
1542 ; CHECK-LABEL: sqshrun8b:
1544 ; CHECK-NEXT: ldr q0, [x0]
1545 ; CHECK-NEXT: sqshrun.8b v0, v0, #1
1547 %tmp1 = load <8 x i16>, ptr %A
1548 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
1552 define <4 x i16> @sqshrun4h(ptr %A) nounwind {
1553 ; CHECK-LABEL: sqshrun4h:
1555 ; CHECK-NEXT: ldr q0, [x0]
1556 ; CHECK-NEXT: sqshrun.4h v0, v0, #1
1558 %tmp1 = load <4 x i32>, ptr %A
1559 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
1563 define <2 x i32> @sqshrun2s(ptr %A) nounwind {
1564 ; CHECK-LABEL: sqshrun2s:
1566 ; CHECK-NEXT: ldr q0, [x0]
1567 ; CHECK-NEXT: sqshrun.2s v0, v0, #1
1569 %tmp1 = load <2 x i64>, ptr %A
1570 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
1574 define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind {
1575 ; CHECK-LABEL: sqshrun16b:
1577 ; CHECK-NEXT: ldr d0, [x0]
1578 ; CHECK-NEXT: ldr q1, [x1]
1579 ; CHECK-NEXT: sqshrun2.16b v0, v1, #1
1581 %out = load <8 x i8>, ptr %ret
1582 %tmp1 = load <8 x i16>, ptr %A
1583 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
1584 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1588 define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind {
1589 ; CHECK-LABEL: sqshrun8h:
1591 ; CHECK-NEXT: ldr d0, [x0]
1592 ; CHECK-NEXT: ldr q1, [x1]
1593 ; CHECK-NEXT: sqshrun2.8h v0, v1, #1
1595 %out = load <4 x i16>, ptr %ret
1596 %tmp1 = load <4 x i32>, ptr %A
1597 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
1598 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1602 define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind {
1603 ; CHECK-LABEL: sqshrun4s:
1605 ; CHECK-NEXT: ldr d0, [x0]
1606 ; CHECK-NEXT: ldr q1, [x1]
1607 ; CHECK-NEXT: sqshrun2.4s v0, v1, #1
1609 %out = load <2 x i32>, ptr %ret
1610 %tmp1 = load <2 x i64>, ptr %A
1611 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
1612 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1616 declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
1617 declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
1618 declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
1619 declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
1621 define i32 @sqrshrn1s(i64 %A) nounwind {
1622 ; CHECK-LABEL: sqrshrn1s:
1624 ; CHECK-NEXT: fmov d0, x0
1625 ; CHECK-NEXT: sqrshrn s0, d0, #1
1626 ; CHECK-NEXT: fmov w0, s0
1628 %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
1632 define <8 x i8> @sqrshrn8b(ptr %A) nounwind {
1633 ; CHECK-LABEL: sqrshrn8b:
1635 ; CHECK-NEXT: ldr q0, [x0]
1636 ; CHECK-NEXT: sqrshrn.8b v0, v0, #1
1638 %tmp1 = load <8 x i16>, ptr %A
1639 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1643 define <4 x i16> @sqrshrn4h(ptr %A) nounwind {
1644 ; CHECK-LABEL: sqrshrn4h:
1646 ; CHECK-NEXT: ldr q0, [x0]
1647 ; CHECK-NEXT: sqrshrn.4h v0, v0, #1
1649 %tmp1 = load <4 x i32>, ptr %A
1650 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1654 define <2 x i32> @sqrshrn2s(ptr %A) nounwind {
1655 ; CHECK-LABEL: sqrshrn2s:
1657 ; CHECK-NEXT: ldr q0, [x0]
1658 ; CHECK-NEXT: sqrshrn.2s v0, v0, #1
1660 %tmp1 = load <2 x i64>, ptr %A
1661 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1665 define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind {
1666 ; CHECK-LABEL: sqrshrn16b:
1668 ; CHECK-NEXT: ldr d0, [x0]
1669 ; CHECK-NEXT: ldr q1, [x1]
1670 ; CHECK-NEXT: sqrshrn2.16b v0, v1, #1
1672 %out = load <8 x i8>, ptr %ret
1673 %tmp1 = load <8 x i16>, ptr %A
1674 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1675 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1679 define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind {
1680 ; CHECK-LABEL: sqrshrn8h:
1682 ; CHECK-NEXT: ldr d0, [x0]
1683 ; CHECK-NEXT: ldr q1, [x1]
1684 ; CHECK-NEXT: sqrshrn2.8h v0, v1, #1
1686 %out = load <4 x i16>, ptr %ret
1687 %tmp1 = load <4 x i32>, ptr %A
1688 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1689 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1693 define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind {
1694 ; CHECK-LABEL: sqrshrn4s:
1696 ; CHECK-NEXT: ldr d0, [x0]
1697 ; CHECK-NEXT: ldr q1, [x1]
1698 ; CHECK-NEXT: sqrshrn2.4s v0, v1, #1
1700 %out = load <2 x i32>, ptr %ret
1701 %tmp1 = load <2 x i64>, ptr %A
1702 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1703 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1707 declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
1708 declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
1709 declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
1710 declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
1712 define i32 @sqrshrun1s(i64 %A) nounwind {
1713 ; CHECK-LABEL: sqrshrun1s:
1715 ; CHECK-NEXT: fmov d0, x0
1716 ; CHECK-NEXT: sqrshrun s0, d0, #1
1717 ; CHECK-NEXT: fmov w0, s0
1719 %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
1723 define <8 x i8> @sqrshrun8b(ptr %A) nounwind {
1724 ; CHECK-LABEL: sqrshrun8b:
1726 ; CHECK-NEXT: ldr q0, [x0]
1727 ; CHECK-NEXT: sqrshrun.8b v0, v0, #1
1729 %tmp1 = load <8 x i16>, ptr %A
1730 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
1734 define <4 x i16> @sqrshrun4h(ptr %A) nounwind {
1735 ; CHECK-LABEL: sqrshrun4h:
1737 ; CHECK-NEXT: ldr q0, [x0]
1738 ; CHECK-NEXT: sqrshrun.4h v0, v0, #1
1740 %tmp1 = load <4 x i32>, ptr %A
1741 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
1745 define <2 x i32> @sqrshrun2s(ptr %A) nounwind {
1746 ; CHECK-LABEL: sqrshrun2s:
1748 ; CHECK-NEXT: ldr q0, [x0]
1749 ; CHECK-NEXT: sqrshrun.2s v0, v0, #1
1751 %tmp1 = load <2 x i64>, ptr %A
1752 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
1756 define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind {
1757 ; CHECK-LABEL: sqrshrun16b:
1759 ; CHECK-NEXT: ldr d0, [x0]
1760 ; CHECK-NEXT: ldr q1, [x1]
1761 ; CHECK-NEXT: sqrshrun2.16b v0, v1, #1
1763 %out = load <8 x i8>, ptr %ret
1764 %tmp1 = load <8 x i16>, ptr %A
1765 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
1766 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1770 define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind {
1771 ; CHECK-LABEL: sqrshrun8h:
1773 ; CHECK-NEXT: ldr d0, [x0]
1774 ; CHECK-NEXT: ldr q1, [x1]
1775 ; CHECK-NEXT: sqrshrun2.8h v0, v1, #1
1777 %out = load <4 x i16>, ptr %ret
1778 %tmp1 = load <4 x i32>, ptr %A
1779 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
1780 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1784 define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind {
1785 ; CHECK-LABEL: sqrshrun4s:
1787 ; CHECK-NEXT: ldr d0, [x0]
1788 ; CHECK-NEXT: ldr q1, [x1]
1789 ; CHECK-NEXT: sqrshrun2.4s v0, v1, #1
1791 %out = load <2 x i32>, ptr %ret
1792 %tmp1 = load <2 x i64>, ptr %A
1793 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
1794 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1798 declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
1799 declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
1800 declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
1801 declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
1803 define i32 @uqrshrn1s(i64 %A) nounwind {
1804 ; CHECK-LABEL: uqrshrn1s:
1806 ; CHECK-NEXT: fmov d0, x0
1807 ; CHECK-NEXT: uqrshrn s0, d0, #1
1808 ; CHECK-NEXT: fmov w0, s0
1810 %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
1814 define <8 x i8> @uqrshrn8b(ptr %A) nounwind {
1815 ; CHECK-LABEL: uqrshrn8b:
1817 ; CHECK-NEXT: ldr q0, [x0]
1818 ; CHECK-NEXT: uqrshrn.8b v0, v0, #1
1820 %tmp1 = load <8 x i16>, ptr %A
1821 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1825 define <4 x i16> @uqrshrn4h(ptr %A) nounwind {
1826 ; CHECK-LABEL: uqrshrn4h:
1828 ; CHECK-NEXT: ldr q0, [x0]
1829 ; CHECK-NEXT: uqrshrn.4h v0, v0, #1
1831 %tmp1 = load <4 x i32>, ptr %A
1832 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1836 define <2 x i32> @uqrshrn2s(ptr %A) nounwind {
1837 ; CHECK-LABEL: uqrshrn2s:
1839 ; CHECK-NEXT: ldr q0, [x0]
1840 ; CHECK-NEXT: uqrshrn.2s v0, v0, #1
1842 %tmp1 = load <2 x i64>, ptr %A
1843 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1847 define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind {
1848 ; CHECK-LABEL: uqrshrn16b:
1850 ; CHECK-NEXT: ldr d0, [x0]
1851 ; CHECK-NEXT: ldr q1, [x1]
1852 ; CHECK-NEXT: uqrshrn2.16b v0, v1, #1
1854 %out = load <8 x i8>, ptr %ret
1855 %tmp1 = load <8 x i16>, ptr %A
1856 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1857 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1861 define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind {
1862 ; CHECK-LABEL: uqrshrn8h:
1864 ; CHECK-NEXT: ldr d0, [x0]
1865 ; CHECK-NEXT: ldr q1, [x1]
1866 ; CHECK-NEXT: uqrshrn2.8h v0, v1, #1
1868 %out = load <4 x i16>, ptr %ret
1869 %tmp1 = load <4 x i32>, ptr %A
1870 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1871 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1875 define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind {
1876 ; CHECK-LABEL: uqrshrn4s:
1878 ; CHECK-NEXT: ldr d0, [x0]
1879 ; CHECK-NEXT: ldr q1, [x1]
1880 ; CHECK-NEXT: uqrshrn2.4s v0, v1, #1
1882 %out = load <2 x i32>, ptr %ret
1883 %tmp1 = load <2 x i64>, ptr %A
1884 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1885 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1889 declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
1890 declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
1891 declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
1892 declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
1894 define i32 @uqshrn1s(i64 %A) nounwind {
1895 ; CHECK-LABEL: uqshrn1s:
1897 ; CHECK-NEXT: fmov d0, x0
1898 ; CHECK-NEXT: uqshrn s0, d0, #1
1899 ; CHECK-NEXT: fmov w0, s0
1901 %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
1905 define <8 x i8> @uqshrn8b(ptr %A) nounwind {
1906 ; CHECK-LABEL: uqshrn8b:
1908 ; CHECK-NEXT: ldr q0, [x0]
1909 ; CHECK-NEXT: uqshrn.8b v0, v0, #1
1911 %tmp1 = load <8 x i16>, ptr %A
1912 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1916 define <4 x i16> @uqshrn4h(ptr %A) nounwind {
1917 ; CHECK-LABEL: uqshrn4h:
1919 ; CHECK-NEXT: ldr q0, [x0]
1920 ; CHECK-NEXT: uqshrn.4h v0, v0, #1
1922 %tmp1 = load <4 x i32>, ptr %A
1923 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1927 define <2 x i32> @uqshrn2s(ptr %A) nounwind {
1928 ; CHECK-LABEL: uqshrn2s:
1930 ; CHECK-NEXT: ldr q0, [x0]
1931 ; CHECK-NEXT: uqshrn.2s v0, v0, #1
1933 %tmp1 = load <2 x i64>, ptr %A
1934 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1938 define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind {
1939 ; CHECK-LABEL: uqshrn16b:
1941 ; CHECK-NEXT: ldr d0, [x0]
1942 ; CHECK-NEXT: ldr q1, [x1]
1943 ; CHECK-NEXT: uqshrn2.16b v0, v1, #1
1945 %out = load <8 x i8>, ptr %ret
1946 %tmp1 = load <8 x i16>, ptr %A
1947 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1948 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1952 define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind {
1953 ; CHECK-LABEL: uqshrn8h:
1955 ; CHECK-NEXT: ldr d0, [x0]
1956 ; CHECK-NEXT: ldr q1, [x1]
1957 ; CHECK-NEXT: uqshrn2.8h v0, v1, #1
1959 %out = load <4 x i16>, ptr %ret
1960 %tmp1 = load <4 x i32>, ptr %A
1961 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1962 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1966 define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind {
1967 ; CHECK-LABEL: uqshrn4s:
1969 ; CHECK-NEXT: ldr d0, [x0]
1970 ; CHECK-NEXT: ldr q1, [x1]
1971 ; CHECK-NEXT: uqshrn2.4s v0, v1, #1
1973 %out = load <2 x i32>, ptr %ret
1974 %tmp1 = load <2 x i64>, ptr %A
1975 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1976 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1980 declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
1981 declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
1982 declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
1983 declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
1985 define <8 x i16> @ushll8h(ptr %A) nounwind {
1986 ; CHECK-LABEL: ushll8h:
1988 ; CHECK-NEXT: ldr d0, [x0]
1989 ; CHECK-NEXT: ushll.8h v0, v0, #1
1991 %tmp1 = load <8 x i8>, ptr %A
1992 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
1993 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1997 define <4 x i32> @ushll4s(ptr %A) nounwind {
1998 ; CHECK-LABEL: ushll4s:
2000 ; CHECK-NEXT: ldr d0, [x0]
2001 ; CHECK-NEXT: ushll.4s v0, v0, #1
2003 %tmp1 = load <4 x i16>, ptr %A
2004 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2005 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
2009 define <2 x i64> @ushll2d(ptr %A) nounwind {
2010 ; CHECK-LABEL: ushll2d:
2012 ; CHECK-NEXT: ldr d0, [x0]
2013 ; CHECK-NEXT: ushll.2d v0, v0, #1
2015 %tmp1 = load <2 x i32>, ptr %A
2016 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
2017 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2021 define <8 x i16> @ushll2_8h(ptr %A) nounwind {
2022 ; CHECK-LABEL: ushll2_8h:
2024 ; CHECK-NEXT: ldr d0, [x0, #8]
2025 ; CHECK-NEXT: ushll.8h v0, v0, #1
2027 %load1 = load <16 x i8>, ptr %A
2028 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2029 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
2030 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2034 define <4 x i32> @ushll2_4s(ptr %A) nounwind {
2035 ; CHECK-LABEL: ushll2_4s:
2037 ; CHECK-NEXT: ldr d0, [x0, #8]
2038 ; CHECK-NEXT: ushll.4s v0, v0, #1
2040 %load1 = load <8 x i16>, ptr %A
2041 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2042 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2043 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
2047 define <2 x i64> @ushll2_2d(ptr %A) nounwind {
2048 ; CHECK-LABEL: ushll2_2d:
2050 ; CHECK-NEXT: ldr d0, [x0, #8]
2051 ; CHECK-NEXT: ushll.2d v0, v0, #1
2053 %load1 = load <4 x i32>, ptr %A
2054 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2055 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
2056 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2060 declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>)
2061 declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>)
2062 declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>)
2063 declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>)
2064 declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>)
2065 declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64)
2067 define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind {
2068 ; CHECK-LABEL: neon.ushll8h_constant_shift:
2070 ; CHECK-NEXT: ldr d0, [x0]
2071 ; CHECK-NEXT: ushll.8h v0, v0, #1
2073 %tmp1 = load <8 x i8>, ptr %A
2074 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
2075 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2079 define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind {
2080 ; CHECK-LABEL: neon.ushl8h_no_constant_shift:
2082 ; CHECK-NEXT: ldr d0, [x0]
2083 ; CHECK-NEXT: ushll.8h v0, v0, #0
2084 ; CHECK-NEXT: ushl.8h v0, v0, v0
2086 %tmp1 = load <8 x i8>, ptr %A
2087 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
2088 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp2)
2092 define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind {
2093 ; CHECK-LABEL: neon.ushl8h_constant_shift_extend_not_2x:
2095 ; CHECK-NEXT: ldr s0, [x0]
2096 ; CHECK-NEXT: ushll.8h v0, v0, #0
2097 ; CHECK-NEXT: ushll.4s v0, v0, #1
2099 %tmp1 = load <4 x i8>, ptr %A
2100 %tmp2 = zext <4 x i8> %tmp1 to <4 x i32>
2101 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2105 define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind {
2106 ; CHECK-LABEL: neon.ushl8_noext_constant_shift:
2108 ; CHECK-NEXT: ldr q0, [x0]
2109 ; CHECK-NEXT: add.8h v0, v0, v0
2111 %tmp1 = load <8 x i16>, ptr %A
2112 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2116 define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind {
2117 ; CHECK-LABEL: neon.ushll4s_constant_shift:
2119 ; CHECK-NEXT: ldr d0, [x0]
2120 ; CHECK-NEXT: ushll.4s v0, v0, #1
2122 %tmp1 = load <4 x i16>, ptr %A
2123 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2124 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2128 ; FIXME: unnecessary ushll.4s v0, v0, #0?
2129 define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind {
2130 ; CHECK-LABEL: neon.ushll4s_neg_constant_shift:
2132 ; CHECK-NEXT: ldr d0, [x0]
2133 ; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff
2134 ; CHECK-NEXT: ushll.4s v0, v0, #0
2135 ; CHECK-NEXT: ushl.4s v0, v0, v1
2137 %tmp1 = load <4 x i16>, ptr %A
2138 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2139 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
2143 ; FIXME: should be constant folded.
2144 define <4 x i32> @neon.ushll4s_constant_fold() nounwind {
2145 ; CHECK-LABEL: neon.ushll4s_constant_fold:
2147 ; CHECK-NEXT: adrp x8, .LCPI160_0
2148 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI160_0]
2149 ; CHECK-NEXT: add.4s v0, v0, v0
2151 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2155 define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind {
2156 ; CHECK-LABEL: neon.ushll2d_constant_shift:
2158 ; CHECK-NEXT: ldr d0, [x0]
2159 ; CHECK-NEXT: ushll.2d v0, v0, #1
2161 %tmp1 = load <2 x i32>, ptr %A
2162 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
2163 %tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
2167 define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind {
2168 ; CHECK-LABEL: neon.ushl_vscalar_constant_shift:
2170 ; CHECK-NEXT: movi.2d v1, #0000000000000000
2171 ; CHECK-NEXT: ldr s0, [x0]
2172 ; CHECK-NEXT: zip1.2s v0, v0, v1
2173 ; CHECK-NEXT: shl d0, d0, #1
2175 %tmp1 = load <1 x i32>, ptr %A
2176 %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
2177 %tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
2181 define i64 @neon.ushl_scalar_constant_shift(ptr %A) nounwind {
2182 ; CHECK-LABEL: neon.ushl_scalar_constant_shift:
2184 ; CHECK-NEXT: ldr w8, [x0]
2185 ; CHECK-NEXT: fmov d0, x8
2186 ; CHECK-NEXT: shl d0, d0, #1
2187 ; CHECK-NEXT: fmov x0, d0
2189 %tmp1 = load i32, ptr %A
2190 %tmp2 = zext i32 %tmp1 to i64
2191 %tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1)
2195 define <8 x i16> @sshll8h(ptr %A) nounwind {
2196 ; CHECK-LABEL: sshll8h:
2198 ; CHECK-NEXT: ldr d0, [x0]
2199 ; CHECK-NEXT: sshll.8h v0, v0, #1
2201 %tmp1 = load <8 x i8>, ptr %A
2202 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
2203 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2207 define <2 x i64> @sshll2d(ptr %A) nounwind {
2208 ; CHECK-LABEL: sshll2d:
2210 ; CHECK-NEXT: ldr d0, [x0]
2211 ; CHECK-NEXT: sshll.2d v0, v0, #1
2213 %tmp1 = load <2 x i32>, ptr %A
2214 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
2215 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2219 declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>)
2220 declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>)
2221 declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>)
2222 declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>)
2223 declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>)
2224 declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64)
2226 define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind {
2227 ; CHECK-LABEL: neon.sshl16b_constant_shift:
2229 ; CHECK-NEXT: ldr q0, [x0]
2230 ; CHECK-NEXT: add.16b v0, v0, v0
2232 %tmp1 = load <16 x i8>, ptr %A
2233 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2237 define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind {
2238 ; CHECK-LABEL: neon.sshl16b_non_splat_constant_shift:
2240 ; CHECK-NEXT: adrp x8, .LCPI167_0
2241 ; CHECK-NEXT: ldr q0, [x0]
2242 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI167_0]
2243 ; CHECK-NEXT: sshl.16b v0, v0, v1
2245 %tmp1 = load <16 x i8>, ptr %A
2246 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2250 define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind {
2251 ; CHECK-LABEL: neon.sshl16b_neg_constant_shift:
2253 ; CHECK-NEXT: movi.16b v1, #254
2254 ; CHECK-NEXT: ldr q0, [x0]
2255 ; CHECK-NEXT: sshl.16b v0, v0, v1
2257 %tmp1 = load <16 x i8>, ptr %A
2258 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
2262 define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind {
2263 ; CHECK-LABEL: neon.sshll8h_constant_shift:
2265 ; CHECK-NEXT: ldr d0, [x0]
2266 ; CHECK-NEXT: sshll.8h v0, v0, #1
2268 %tmp1 = load <8 x i8>, ptr %A
2269 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
2270 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2274 define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind {
2275 ; CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift:
2277 ; CHECK-NEXT: ldr s0, [x0]
2278 ; CHECK-NEXT: sshll.8h v0, v0, #0
2279 ; CHECK-NEXT: sshll.4s v0, v0, #1
2281 %tmp1 = load <4 x i8>, ptr %A
2282 %tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
2283 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2287 define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind {
2288 ; CHECK-LABEL: neon.sshll4s_constant_shift:
2290 ; CHECK-NEXT: ldr d0, [x0]
2291 ; CHECK-NEXT: sshll.4s v0, v0, #1
2293 %tmp1 = load <4 x i16>, ptr %A
2294 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
2295 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2299 define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind {
2300 ; CHECK-LABEL: neon.sshll4s_neg_constant_shift:
2302 ; CHECK-NEXT: ldr d0, [x0]
2303 ; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff
2304 ; CHECK-NEXT: sshll.4s v0, v0, #0
2305 ; CHECK-NEXT: sshl.4s v0, v0, v1
2307 %tmp1 = load <4 x i16>, ptr %A
2308 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
2309 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
2313 ; FIXME: should be constant folded.
2314 define <4 x i32> @neon.sshl4s_constant_fold() nounwind {
2315 ; CHECK-LABEL: neon.sshl4s_constant_fold:
2317 ; CHECK-NEXT: adrp x8, .LCPI173_0
2318 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI173_0]
2319 ; CHECK-NEXT: shl.4s v0, v0, #2
2321 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
2325 define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind {
2326 ; CHECK-LABEL: neon.sshl4s_no_fold:
2328 ; CHECK-NEXT: ldr q0, [x0]
2329 ; CHECK-NEXT: add.4s v0, v0, v0
2331 %tmp1 = load <4 x i32>, ptr %A
2332 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2336 define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind {
2337 ; CHECK-LABEL: neon.sshll2d_constant_shift:
2339 ; CHECK-NEXT: ldr d0, [x0]
2340 ; CHECK-NEXT: sshll.2d v0, v0, #1
2342 %tmp1 = load <2 x i32>, ptr %A
2343 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
2344 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
2348 define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind {
2349 ; CHECK-LABEL: neon.sshll_vscalar_constant_shift:
2351 ; CHECK-NEXT: movi.2d v1, #0000000000000000
2352 ; CHECK-NEXT: ldr s0, [x0]
2353 ; CHECK-NEXT: zip1.2s v0, v0, v1
2354 ; CHECK-NEXT: shl d0, d0, #1
2356 %tmp1 = load <1 x i32>, ptr %A
2357 %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
2358 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
2362 define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind {
2363 ; CHECK-LABEL: neon.sshll_scalar_constant_shift:
2365 ; CHECK-NEXT: ldr w8, [x0]
2366 ; CHECK-NEXT: fmov d0, x8
2367 ; CHECK-NEXT: shl d0, d0, #1
2368 ; CHECK-NEXT: fmov x0, d0
2370 %tmp1 = load i32, ptr %A
2371 %tmp2 = zext i32 %tmp1 to i64
2372 %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1)
2376 define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind {
2377 ; CHECK-LABEL: neon.sshll_scalar_constant_shift_m1:
2379 ; CHECK-NEXT: ldr w8, [x0]
2380 ; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
2381 ; CHECK-NEXT: fmov d1, x9
2382 ; CHECK-NEXT: fmov d0, x8
2383 ; CHECK-NEXT: sshl d0, d0, d1
2384 ; CHECK-NEXT: fmov x0, d0
2386 %tmp1 = load i32, ptr %A
2387 %tmp2 = zext i32 %tmp1 to i64
2388 %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 -1)
2392 ; FIXME: should be constant folded.
2393 define <2 x i64> @neon.sshl2d_constant_fold() nounwind {
2394 ; CHECK-LABEL: neon.sshl2d_constant_fold:
2396 ; CHECK-NEXT: adrp x8, .LCPI179_0
2397 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI179_0]
2398 ; CHECK-NEXT: add.2d v0, v0, v0
2400 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
2404 define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind {
2405 ; CHECK-LABEL: neon.sshl2d_no_fold:
2407 ; CHECK-NEXT: ldr q0, [x0]
2408 ; CHECK-NEXT: shl.2d v0, v0, #2
2410 %tmp2 = load <2 x i64>, ptr %A
2411 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 2, i64 2>)
2415 define <8 x i16> @sshll2_8h(ptr %A) nounwind {
2416 ; CHECK-LABEL: sshll2_8h:
2418 ; CHECK-NEXT: ldr d0, [x0, #8]
2419 ; CHECK-NEXT: sshll.8h v0, v0, #1
2421 %load1 = load <16 x i8>, ptr %A
2422 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2423 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
2424 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2428 define <4 x i32> @sshll2_4s(ptr %A) nounwind {
2429 ; CHECK-LABEL: sshll2_4s:
2431 ; CHECK-NEXT: ldr d0, [x0, #8]
2432 ; CHECK-NEXT: sshll.4s v0, v0, #1
2434 %load1 = load <8 x i16>, ptr %A
2435 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2436 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
2437 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
2441 define <2 x i64> @sshll2_2d(ptr %A) nounwind {
2442 ; CHECK-LABEL: sshll2_2d:
2444 ; CHECK-NEXT: ldr d0, [x0, #8]
2445 ; CHECK-NEXT: sshll.2d v0, v0, #1
2447 %load1 = load <4 x i32>, ptr %A
2448 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2449 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
2450 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2454 define <8 x i8> @sqshli8b(ptr %A) nounwind {
2455 ; CHECK-LABEL: sqshli8b:
2457 ; CHECK-NEXT: ldr d0, [x0]
2458 ; CHECK-NEXT: sqshl.8b v0, v0, #1
2460 %tmp1 = load <8 x i8>, ptr %A
2461 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2465 define <4 x i16> @sqshli4h(ptr %A) nounwind {
2466 ; CHECK-LABEL: sqshli4h:
2468 ; CHECK-NEXT: ldr d0, [x0]
2469 ; CHECK-NEXT: sqshl.4h v0, v0, #1
2471 %tmp1 = load <4 x i16>, ptr %A
2472 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
2476 define <2 x i32> @sqshli2s(ptr %A) nounwind {
2477 ; CHECK-LABEL: sqshli2s:
2479 ; CHECK-NEXT: ldr d0, [x0]
2480 ; CHECK-NEXT: sqshl.2s v0, v0, #1
2482 %tmp1 = load <2 x i32>, ptr %A
2483 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
2487 define <16 x i8> @sqshli16b(ptr %A) nounwind {
2488 ; CHECK-LABEL: sqshli16b:
2490 ; CHECK-NEXT: ldr q0, [x0]
2491 ; CHECK-NEXT: sqshl.16b v0, v0, #1
2493 %tmp1 = load <16 x i8>, ptr %A
2494 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2498 define <8 x i16> @sqshli8h(ptr %A) nounwind {
2499 ; CHECK-LABEL: sqshli8h:
2501 ; CHECK-NEXT: ldr q0, [x0]
2502 ; CHECK-NEXT: sqshl.8h v0, v0, #1
2504 %tmp1 = load <8 x i16>, ptr %A
2505 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2509 define <4 x i32> @sqshli4s(ptr %A) nounwind {
2510 ; CHECK-LABEL: sqshli4s:
2512 ; CHECK-NEXT: ldr q0, [x0]
2513 ; CHECK-NEXT: sqshl.4s v0, v0, #1
2515 %tmp1 = load <4 x i32>, ptr %A
2516 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2520 define <2 x i64> @sqshli2d(ptr %A) nounwind {
2521 ; CHECK-LABEL: sqshli2d:
2523 ; CHECK-NEXT: ldr q0, [x0]
2524 ; CHECK-NEXT: sqshl.2d v0, v0, #1
2526 %tmp1 = load <2 x i64>, ptr %A
2527 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
2531 define <8 x i8> @uqshli8b(ptr %A) nounwind {
2532 ; CHECK-LABEL: uqshli8b:
2534 ; CHECK-NEXT: ldr d0, [x0]
2535 ; CHECK-NEXT: uqshl.8b v0, v0, #1
2537 %tmp1 = load <8 x i8>, ptr %A
2538 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2542 define <8 x i8> @uqshli8b_1(ptr %A) nounwind {
2543 ; CHECK-LABEL: uqshli8b_1:
2545 ; CHECK-NEXT: movi.8b v1, #8
2546 ; CHECK-NEXT: ldr d0, [x0]
2547 ; CHECK-NEXT: uqshl.8b v0, v0, v1
2549 %tmp1 = load <8 x i8>, ptr %A
2550 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
2554 define <4 x i16> @uqshli4h(ptr %A) nounwind {
2555 ; CHECK-LABEL: uqshli4h:
2557 ; CHECK-NEXT: ldr d0, [x0]
2558 ; CHECK-NEXT: uqshl.4h v0, v0, #1
2560 %tmp1 = load <4 x i16>, ptr %A
2561 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
2565 define <2 x i32> @uqshli2s(ptr %A) nounwind {
2566 ; CHECK-LABEL: uqshli2s:
2568 ; CHECK-NEXT: ldr d0, [x0]
2569 ; CHECK-NEXT: uqshl.2s v0, v0, #1
2571 %tmp1 = load <2 x i32>, ptr %A
2572 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
2576 define <16 x i8> @uqshli16b(ptr %A) nounwind {
2577 ; CHECK-LABEL: uqshli16b:
2579 ; CHECK-NEXT: ldr q0, [x0]
2580 ; CHECK-NEXT: uqshl.16b v0, v0, #1
2582 %tmp1 = load <16 x i8>, ptr %A
2583 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2587 define <8 x i16> @uqshli8h(ptr %A) nounwind {
2588 ; CHECK-LABEL: uqshli8h:
2590 ; CHECK-NEXT: ldr q0, [x0]
2591 ; CHECK-NEXT: uqshl.8h v0, v0, #1
2593 %tmp1 = load <8 x i16>, ptr %A
2594 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2598 define <4 x i32> @uqshli4s(ptr %A) nounwind {
2599 ; CHECK-LABEL: uqshli4s:
2601 ; CHECK-NEXT: ldr q0, [x0]
2602 ; CHECK-NEXT: uqshl.4s v0, v0, #1
2604 %tmp1 = load <4 x i32>, ptr %A
2605 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2609 define <2 x i64> @uqshli2d(ptr %A) nounwind {
2610 ; CHECK-LABEL: uqshli2d:
2612 ; CHECK-NEXT: ldr q0, [x0]
2613 ; CHECK-NEXT: uqshl.2d v0, v0, #1
2615 %tmp1 = load <2 x i64>, ptr %A
2616 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
2620 define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind {
2621 ; CHECK-LABEL: ursra8b:
2623 ; CHECK-NEXT: ldr d1, [x0]
2624 ; CHECK-NEXT: ldr d0, [x1]
2625 ; CHECK-NEXT: ursra.8b v0, v1, #1
2627 %tmp1 = load <8 x i8>, ptr %A
2628 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
2629 %tmp4 = load <8 x i8>, ptr %B
2630 %tmp5 = add <8 x i8> %tmp3, %tmp4
2634 define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind {
2635 ; CHECK-LABEL: ursra4h:
2637 ; CHECK-NEXT: ldr d1, [x0]
2638 ; CHECK-NEXT: ldr d0, [x1]
2639 ; CHECK-NEXT: ursra.4h v0, v1, #1
2641 %tmp1 = load <4 x i16>, ptr %A
2642 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
2643 %tmp4 = load <4 x i16>, ptr %B
2644 %tmp5 = add <4 x i16> %tmp3, %tmp4
2648 define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind {
2649 ; CHECK-LABEL: ursra2s:
2651 ; CHECK-NEXT: ldr d1, [x0]
2652 ; CHECK-NEXT: ldr d0, [x1]
2653 ; CHECK-NEXT: ursra.2s v0, v1, #1
2655 %tmp1 = load <2 x i32>, ptr %A
2656 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
2657 %tmp4 = load <2 x i32>, ptr %B
2658 %tmp5 = add <2 x i32> %tmp3, %tmp4
2662 define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind {
2663 ; CHECK-LABEL: ursra16b:
2665 ; CHECK-NEXT: ldr q1, [x0]
2666 ; CHECK-NEXT: ldr q0, [x1]
2667 ; CHECK-NEXT: ursra.16b v0, v1, #1
2669 %tmp1 = load <16 x i8>, ptr %A
2670 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
2671 %tmp4 = load <16 x i8>, ptr %B
2672 %tmp5 = add <16 x i8> %tmp3, %tmp4
2676 define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind {
2677 ; CHECK-LABEL: ursra8h:
2679 ; CHECK-NEXT: ldr q1, [x0]
2680 ; CHECK-NEXT: ldr q0, [x1]
2681 ; CHECK-NEXT: ursra.8h v0, v1, #1
2683 %tmp1 = load <8 x i16>, ptr %A
2684 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
2685 %tmp4 = load <8 x i16>, ptr %B
2686 %tmp5 = add <8 x i16> %tmp3, %tmp4
2690 define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind {
2691 ; CHECK-LABEL: ursra4s:
2693 ; CHECK-NEXT: ldr q1, [x0]
2694 ; CHECK-NEXT: ldr q0, [x1]
2695 ; CHECK-NEXT: ursra.4s v0, v1, #1
2697 %tmp1 = load <4 x i32>, ptr %A
2698 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
2699 %tmp4 = load <4 x i32>, ptr %B
2700 %tmp5 = add <4 x i32> %tmp3, %tmp4
2704 define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind {
2705 ; CHECK-LABEL: ursra2d:
2707 ; CHECK-NEXT: ldr q1, [x0]
2708 ; CHECK-NEXT: ldr q0, [x1]
2709 ; CHECK-NEXT: ursra.2d v0, v1, #1
2711 %tmp1 = load <2 x i64>, ptr %A
2712 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
2713 %tmp4 = load <2 x i64>, ptr %B
2714 %tmp5 = add <2 x i64> %tmp3, %tmp4
2718 define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind {
2719 ; CHECK-LABEL: ursra1d:
2721 ; CHECK-NEXT: ldr d1, [x0]
2722 ; CHECK-NEXT: ldr d0, [x1]
2723 ; CHECK-NEXT: ursra d0, d1, #1
2725 %tmp1 = load <1 x i64>, ptr %A
2726 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
2727 %tmp4 = load <1 x i64>, ptr %B
2728 %tmp5 = add <1 x i64> %tmp3, %tmp4
2732 define i64 @ursra_scalar(ptr %A, ptr %B) nounwind {
2733 ; CHECK-LABEL: ursra_scalar:
2735 ; CHECK-NEXT: ldr d0, [x0]
2736 ; CHECK-NEXT: ldr d1, [x1]
2737 ; CHECK-NEXT: ursra d1, d0, #1
2738 ; CHECK-NEXT: fmov x0, d1
2740 %tmp1 = load i64, ptr %A
2741 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
2742 %tmp4 = load i64, ptr %B
2743 %tmp5 = add i64 %tmp3, %tmp4
2747 define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind {
2748 ; CHECK-LABEL: srsra8b:
2750 ; CHECK-NEXT: ldr d1, [x0]
2751 ; CHECK-NEXT: ldr d0, [x1]
2752 ; CHECK-NEXT: srsra.8b v0, v1, #1
2754 %tmp1 = load <8 x i8>, ptr %A
2755 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
2756 %tmp4 = load <8 x i8>, ptr %B
2757 %tmp5 = add <8 x i8> %tmp3, %tmp4
2761 define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind {
2762 ; CHECK-LABEL: srsra4h:
2764 ; CHECK-NEXT: ldr d1, [x0]
2765 ; CHECK-NEXT: ldr d0, [x1]
2766 ; CHECK-NEXT: srsra.4h v0, v1, #1
2768 %tmp1 = load <4 x i16>, ptr %A
2769 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
2770 %tmp4 = load <4 x i16>, ptr %B
2771 %tmp5 = add <4 x i16> %tmp3, %tmp4
2775 define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind {
2776 ; CHECK-LABEL: srsra2s:
2778 ; CHECK-NEXT: ldr d1, [x0]
2779 ; CHECK-NEXT: ldr d0, [x1]
2780 ; CHECK-NEXT: srsra.2s v0, v1, #1
2782 %tmp1 = load <2 x i32>, ptr %A
2783 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
2784 %tmp4 = load <2 x i32>, ptr %B
2785 %tmp5 = add <2 x i32> %tmp3, %tmp4
2789 define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind {
2790 ; CHECK-LABEL: srsra16b:
2792 ; CHECK-NEXT: ldr q1, [x0]
2793 ; CHECK-NEXT: ldr q0, [x1]
2794 ; CHECK-NEXT: srsra.16b v0, v1, #1
2796 %tmp1 = load <16 x i8>, ptr %A
2797 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
2798 %tmp4 = load <16 x i8>, ptr %B
2799 %tmp5 = add <16 x i8> %tmp3, %tmp4
2803 define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind {
2804 ; CHECK-LABEL: srsra8h:
2806 ; CHECK-NEXT: ldr q1, [x0]
2807 ; CHECK-NEXT: ldr q0, [x1]
2808 ; CHECK-NEXT: srsra.8h v0, v1, #1
2810 %tmp1 = load <8 x i16>, ptr %A
2811 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
2812 %tmp4 = load <8 x i16>, ptr %B
2813 %tmp5 = add <8 x i16> %tmp3, %tmp4
2817 define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind {
2818 ; CHECK-LABEL: srsra4s:
2820 ; CHECK-NEXT: ldr q1, [x0]
2821 ; CHECK-NEXT: ldr q0, [x1]
2822 ; CHECK-NEXT: srsra.4s v0, v1, #1
2824 %tmp1 = load <4 x i32>, ptr %A
2825 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
2826 %tmp4 = load <4 x i32>, ptr %B
2827 %tmp5 = add <4 x i32> %tmp3, %tmp4
2831 define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind {
2832 ; CHECK-LABEL: srsra2d:
2834 ; CHECK-NEXT: ldr q1, [x0]
2835 ; CHECK-NEXT: ldr q0, [x1]
2836 ; CHECK-NEXT: srsra.2d v0, v1, #1
2838 %tmp1 = load <2 x i64>, ptr %A
2839 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
2840 %tmp4 = load <2 x i64>, ptr %B
2841 %tmp5 = add <2 x i64> %tmp3, %tmp4
2845 define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind {
2846 ; CHECK-LABEL: srsra1d:
2848 ; CHECK-NEXT: ldr d1, [x0]
2849 ; CHECK-NEXT: ldr d0, [x1]
2850 ; CHECK-NEXT: srsra d0, d1, #1
2852 %tmp1 = load <1 x i64>, ptr %A
2853 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
2854 %tmp4 = load <1 x i64>, ptr %B
2855 %tmp5 = add <1 x i64> %tmp3, %tmp4
2859 define i64 @srsra_scalar(ptr %A, ptr %B) nounwind {
2860 ; CHECK-LABEL: srsra_scalar:
2862 ; CHECK-NEXT: ldr d0, [x0]
2863 ; CHECK-NEXT: ldr d1, [x1]
2864 ; CHECK-NEXT: srsra d1, d0, #1
2865 ; CHECK-NEXT: fmov x0, d1
2867 %tmp1 = load i64, ptr %A
2868 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
2869 %tmp4 = load i64, ptr %B
2870 %tmp5 = add i64 %tmp3, %tmp4
2874 define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind {
2875 ; CHECK-LABEL: usra8b:
2877 ; CHECK-NEXT: ldr d1, [x0]
2878 ; CHECK-NEXT: ldr d0, [x1]
2879 ; CHECK-NEXT: usra.8b v0, v1, #1
2881 %tmp1 = load <8 x i8>, ptr %A
2882 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2883 %tmp4 = load <8 x i8>, ptr %B
2884 %tmp5 = add <8 x i8> %tmp3, %tmp4
2888 define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind {
2889 ; CHECK-LABEL: usra4h:
2891 ; CHECK-NEXT: ldr d1, [x0]
2892 ; CHECK-NEXT: ldr d0, [x1]
2893 ; CHECK-NEXT: usra.4h v0, v1, #1
2895 %tmp1 = load <4 x i16>, ptr %A
2896 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
2897 %tmp4 = load <4 x i16>, ptr %B
2898 %tmp5 = add <4 x i16> %tmp3, %tmp4
2902 define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind {
2903 ; CHECK-LABEL: usra2s:
2905 ; CHECK-NEXT: ldr d1, [x0]
2906 ; CHECK-NEXT: ldr d0, [x1]
2907 ; CHECK-NEXT: usra.2s v0, v1, #1
2909 %tmp1 = load <2 x i32>, ptr %A
2910 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
2911 %tmp4 = load <2 x i32>, ptr %B
2912 %tmp5 = add <2 x i32> %tmp3, %tmp4
2916 define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind {
2917 ; CHECK-LABEL: usra16b:
2919 ; CHECK-NEXT: ldr q1, [x0]
2920 ; CHECK-NEXT: ldr q0, [x1]
2921 ; CHECK-NEXT: usra.16b v0, v1, #1
2923 %tmp1 = load <16 x i8>, ptr %A
2924 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2925 %tmp4 = load <16 x i8>, ptr %B
2926 %tmp5 = add <16 x i8> %tmp3, %tmp4
2930 define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind {
2931 ; CHECK-LABEL: usra8h:
2933 ; CHECK-NEXT: ldr q1, [x0]
2934 ; CHECK-NEXT: ldr q0, [x1]
2935 ; CHECK-NEXT: usra.8h v0, v1, #1
2937 %tmp1 = load <8 x i16>, ptr %A
2938 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2939 %tmp4 = load <8 x i16>, ptr %B
2940 %tmp5 = add <8 x i16> %tmp3, %tmp4
2944 define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind {
2945 ; CHECK-LABEL: usra4s:
2947 ; CHECK-NEXT: ldr q1, [x0]
2948 ; CHECK-NEXT: ldr q0, [x1]
2949 ; CHECK-NEXT: usra.4s v0, v1, #1
2951 %tmp1 = load <4 x i32>, ptr %A
2952 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
2953 %tmp4 = load <4 x i32>, ptr %B
2954 %tmp5 = add <4 x i32> %tmp3, %tmp4
2958 define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind {
2959 ; CHECK-LABEL: usra2d:
2961 ; CHECK-NEXT: ldr q1, [x0]
2962 ; CHECK-NEXT: ldr q0, [x1]
2963 ; CHECK-NEXT: usra.2d v0, v1, #1
2965 %tmp1 = load <2 x i64>, ptr %A
2966 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
2967 %tmp4 = load <2 x i64>, ptr %B
2968 %tmp5 = add <2 x i64> %tmp3, %tmp4
2972 define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind {
2973 ; CHECK-LABEL: usra1d:
2975 ; CHECK-NEXT: ldr d1, [x0]
2976 ; CHECK-NEXT: ldr d0, [x1]
2977 ; CHECK-NEXT: usra d0, d1, #1
2979 %tmp1 = load <1 x i64>, ptr %A
2980 %tmp3 = lshr <1 x i64> %tmp1, <i64 1>
2981 %tmp4 = load <1 x i64>, ptr %B
2982 %tmp5 = add <1 x i64> %tmp3, %tmp4
2986 define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind {
2987 ; CHECK-LABEL: ssra8b:
2989 ; CHECK-NEXT: ldr d1, [x0]
2990 ; CHECK-NEXT: ldr d0, [x1]
2991 ; CHECK-NEXT: ssra.8b v0, v1, #1
2993 %tmp1 = load <8 x i8>, ptr %A
2994 %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2995 %tmp4 = load <8 x i8>, ptr %B
2996 %tmp5 = add <8 x i8> %tmp3, %tmp4
3000 define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind {
3001 ; CHECK-LABEL: ssra4h:
3003 ; CHECK-NEXT: ldr d1, [x0]
3004 ; CHECK-NEXT: ldr d0, [x1]
3005 ; CHECK-NEXT: ssra.4h v0, v1, #1
3007 %tmp1 = load <4 x i16>, ptr %A
3008 %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
3009 %tmp4 = load <4 x i16>, ptr %B
3010 %tmp5 = add <4 x i16> %tmp3, %tmp4
3014 define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind {
3015 ; CHECK-LABEL: ssra2s:
3017 ; CHECK-NEXT: ldr d1, [x0]
3018 ; CHECK-NEXT: ldr d0, [x1]
3019 ; CHECK-NEXT: ssra.2s v0, v1, #1
3021 %tmp1 = load <2 x i32>, ptr %A
3022 %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
3023 %tmp4 = load <2 x i32>, ptr %B
3024 %tmp5 = add <2 x i32> %tmp3, %tmp4
3028 define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind {
3029 ; CHECK-LABEL: ssra16b:
3031 ; CHECK-NEXT: ldr q1, [x0]
3032 ; CHECK-NEXT: ldr q0, [x1]
3033 ; CHECK-NEXT: ssra.16b v0, v1, #1
3035 %tmp1 = load <16 x i8>, ptr %A
3036 %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3037 %tmp4 = load <16 x i8>, ptr %B
3038 %tmp5 = add <16 x i8> %tmp3, %tmp4
3042 define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind {
3043 ; CHECK-LABEL: ssra8h:
3045 ; CHECK-NEXT: ldr q1, [x0]
3046 ; CHECK-NEXT: ldr q0, [x1]
3047 ; CHECK-NEXT: ssra.8h v0, v1, #1
3049 %tmp1 = load <8 x i16>, ptr %A
3050 %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3051 %tmp4 = load <8 x i16>, ptr %B
3052 %tmp5 = add <8 x i16> %tmp3, %tmp4
3056 define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind {
3057 ; CHECK-LABEL: ssra4s:
3059 ; CHECK-NEXT: ldr q1, [x0]
3060 ; CHECK-NEXT: ldr q0, [x1]
3061 ; CHECK-NEXT: ssra.4s v0, v1, #1
3063 %tmp1 = load <4 x i32>, ptr %A
3064 %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3065 %tmp4 = load <4 x i32>, ptr %B
3066 %tmp5 = add <4 x i32> %tmp3, %tmp4
3070 define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind {
3071 ; CHECK-LABEL: ssra2d:
3073 ; CHECK-NEXT: ldr q1, [x0]
3074 ; CHECK-NEXT: ldr q0, [x1]
3075 ; CHECK-NEXT: ssra.2d v0, v1, #1
3077 %tmp1 = load <2 x i64>, ptr %A
3078 %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
3079 %tmp4 = load <2 x i64>, ptr %B
3080 %tmp5 = add <2 x i64> %tmp3, %tmp4
3084 define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind {
3085 ; CHECK-LABEL: shr_orr8b:
3087 ; CHECK-NEXT: ldr d0, [x0]
3088 ; CHECK-NEXT: ldr d1, [x1]
3089 ; CHECK-NEXT: ushr.8b v0, v0, #1
3090 ; CHECK-NEXT: orr.8b v0, v0, v1
3092 %tmp1 = load <8 x i8>, ptr %A
3093 %tmp4 = load <8 x i8>, ptr %B
3094 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3095 %tmp5 = or <8 x i8> %tmp3, %tmp4
3099 define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind {
3100 ; CHECK-LABEL: shr_orr4h:
3102 ; CHECK-NEXT: ldr d0, [x0]
3103 ; CHECK-NEXT: ldr d1, [x1]
3104 ; CHECK-NEXT: ushr.4h v0, v0, #1
3105 ; CHECK-NEXT: orr.8b v0, v0, v1
3107 %tmp1 = load <4 x i16>, ptr %A
3108 %tmp4 = load <4 x i16>, ptr %B
3109 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
3110 %tmp5 = or <4 x i16> %tmp3, %tmp4
3114 define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind {
3115 ; CHECK-LABEL: shr_orr2s:
3117 ; CHECK-NEXT: ldr d0, [x0]
3118 ; CHECK-NEXT: ldr d1, [x1]
3119 ; CHECK-NEXT: ushr.2s v0, v0, #1
3120 ; CHECK-NEXT: orr.8b v0, v0, v1
3122 %tmp1 = load <2 x i32>, ptr %A
3123 %tmp4 = load <2 x i32>, ptr %B
3124 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
3125 %tmp5 = or <2 x i32> %tmp3, %tmp4
3129 define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind {
3130 ; CHECK-LABEL: shr_orr16b:
3132 ; CHECK-NEXT: ldr q0, [x0]
3133 ; CHECK-NEXT: ldr q1, [x1]
3134 ; CHECK-NEXT: ushr.16b v0, v0, #1
3135 ; CHECK-NEXT: orr.16b v0, v0, v1
3137 %tmp1 = load <16 x i8>, ptr %A
3138 %tmp4 = load <16 x i8>, ptr %B
3139 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3140 %tmp5 = or <16 x i8> %tmp3, %tmp4
3144 define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind {
3145 ; CHECK-LABEL: shr_orr8h:
3147 ; CHECK-NEXT: ldr q0, [x0]
3148 ; CHECK-NEXT: ldr q1, [x1]
3149 ; CHECK-NEXT: ushr.8h v0, v0, #1
3150 ; CHECK-NEXT: orr.16b v0, v0, v1
3152 %tmp1 = load <8 x i16>, ptr %A
3153 %tmp4 = load <8 x i16>, ptr %B
3154 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3155 %tmp5 = or <8 x i16> %tmp3, %tmp4
3159 define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind {
3160 ; CHECK-LABEL: shr_orr4s:
3162 ; CHECK-NEXT: ldr q0, [x0]
3163 ; CHECK-NEXT: ldr q1, [x1]
3164 ; CHECK-NEXT: ushr.4s v0, v0, #1
3165 ; CHECK-NEXT: orr.16b v0, v0, v1
3167 %tmp1 = load <4 x i32>, ptr %A
3168 %tmp4 = load <4 x i32>, ptr %B
3169 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3170 %tmp5 = or <4 x i32> %tmp3, %tmp4
3174 define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind {
3175 ; CHECK-LABEL: shr_orr2d:
3177 ; CHECK-NEXT: ldr q0, [x0]
3178 ; CHECK-NEXT: ldr q1, [x1]
3179 ; CHECK-NEXT: ushr.2d v0, v0, #1
3180 ; CHECK-NEXT: orr.16b v0, v0, v1
3182 %tmp1 = load <2 x i64>, ptr %A
3183 %tmp4 = load <2 x i64>, ptr %B
3184 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
3185 %tmp5 = or <2 x i64> %tmp3, %tmp4
3189 define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind {
3190 ; CHECK-LABEL: shl_orr8b:
3192 ; CHECK-NEXT: ldr d0, [x0]
3193 ; CHECK-NEXT: ldr d1, [x1]
3194 ; CHECK-NEXT: add.8b v0, v0, v0
3195 ; CHECK-NEXT: orr.8b v0, v0, v1
3197 %tmp1 = load <8 x i8>, ptr %A
3198 %tmp4 = load <8 x i8>, ptr %B
3199 %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3200 %tmp5 = or <8 x i8> %tmp3, %tmp4
3204 define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind {
3205 ; CHECK-LABEL: shl_orr4h:
3207 ; CHECK-NEXT: ldr d0, [x0]
3208 ; CHECK-NEXT: ldr d1, [x1]
3209 ; CHECK-NEXT: add.4h v0, v0, v0
3210 ; CHECK-NEXT: orr.8b v0, v0, v1
3212 %tmp1 = load <4 x i16>, ptr %A
3213 %tmp4 = load <4 x i16>, ptr %B
3214 %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
3215 %tmp5 = or <4 x i16> %tmp3, %tmp4
3219 define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind {
3220 ; CHECK-LABEL: shl_orr2s:
3222 ; CHECK-NEXT: ldr d0, [x0]
3223 ; CHECK-NEXT: ldr d1, [x1]
3224 ; CHECK-NEXT: add.2s v0, v0, v0
3225 ; CHECK-NEXT: orr.8b v0, v0, v1
3227 %tmp1 = load <2 x i32>, ptr %A
3228 %tmp4 = load <2 x i32>, ptr %B
3229 %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
3230 %tmp5 = or <2 x i32> %tmp3, %tmp4
3234 define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind {
3235 ; CHECK-LABEL: shl_orr16b:
3237 ; CHECK-NEXT: ldr q0, [x0]
3238 ; CHECK-NEXT: ldr q1, [x1]
3239 ; CHECK-NEXT: add.16b v0, v0, v0
3240 ; CHECK-NEXT: orr.16b v0, v0, v1
3242 %tmp1 = load <16 x i8>, ptr %A
3243 %tmp4 = load <16 x i8>, ptr %B
3244 %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3245 %tmp5 = or <16 x i8> %tmp3, %tmp4
3249 define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind {
3250 ; CHECK-LABEL: shl_orr8h:
3252 ; CHECK-NEXT: ldr q0, [x0]
3253 ; CHECK-NEXT: ldr q1, [x1]
3254 ; CHECK-NEXT: add.8h v0, v0, v0
3255 ; CHECK-NEXT: orr.16b v0, v0, v1
3257 %tmp1 = load <8 x i16>, ptr %A
3258 %tmp4 = load <8 x i16>, ptr %B
3259 %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3260 %tmp5 = or <8 x i16> %tmp3, %tmp4
3264 define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind {
3265 ; CHECK-LABEL: shl_orr4s:
3267 ; CHECK-NEXT: ldr q0, [x0]
3268 ; CHECK-NEXT: ldr q1, [x1]
3269 ; CHECK-NEXT: add.4s v0, v0, v0
3270 ; CHECK-NEXT: orr.16b v0, v0, v1
3272 %tmp1 = load <4 x i32>, ptr %A
3273 %tmp4 = load <4 x i32>, ptr %B
3274 %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3275 %tmp5 = or <4 x i32> %tmp3, %tmp4
3279 define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind {
3280 ; CHECK-LABEL: shl_orr2d:
3282 ; CHECK-NEXT: ldr q0, [x0]
3283 ; CHECK-NEXT: ldr q1, [x1]
3284 ; CHECK-NEXT: add.2d v0, v0, v0
3285 ; CHECK-NEXT: orr.16b v0, v0, v1
3287 %tmp1 = load <2 x i64>, ptr %A
3288 %tmp4 = load <2 x i64>, ptr %B
3289 %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
3290 %tmp5 = or <2 x i64> %tmp3, %tmp4
3294 define <8 x i16> @shll(<8 x i8> %in) {
3295 ; CHECK-LABEL: shll:
3297 ; CHECK-NEXT: shll.8h v0, v0, #8
3299 %ext = zext <8 x i8> %in to <8 x i16>
3300 %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
3304 define <4 x i32> @shll_high(<8 x i16> %in) {
3305 ; CHECK-LABEL: shll_high:
3307 ; CHECK-NEXT: shll2.4s v0, v0, #16
3309 %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3310 %ext = zext <4 x i16> %extract to <4 x i32>
3311 %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
3315 define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind {
3316 ; CHECK-LABEL: sli8b:
3318 ; CHECK-NEXT: ldr d0, [x0]
3319 ; CHECK-NEXT: ldr d1, [x1]
3320 ; CHECK-NEXT: sli.8b v0, v1, #1
3322 %tmp1 = load <8 x i8>, ptr %A
3323 %tmp2 = load <8 x i8>, ptr %B
3324 %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
3328 define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind {
3329 ; CHECK-LABEL: sli4h:
3331 ; CHECK-NEXT: ldr d0, [x0]
3332 ; CHECK-NEXT: ldr d1, [x1]
3333 ; CHECK-NEXT: sli.4h v0, v1, #1
3335 %tmp1 = load <4 x i16>, ptr %A
3336 %tmp2 = load <4 x i16>, ptr %B
3337 %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
3341 define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind {
3342 ; CHECK-LABEL: sli2s:
3344 ; CHECK-NEXT: ldr d0, [x0]
3345 ; CHECK-NEXT: ldr d1, [x1]
3346 ; CHECK-NEXT: sli.2s v0, v1, #1
3348 %tmp1 = load <2 x i32>, ptr %A
3349 %tmp2 = load <2 x i32>, ptr %B
3350 %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
3354 define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind {
3355 ; CHECK-LABEL: sli1d:
3357 ; CHECK-NEXT: ldr d0, [x0]
3358 ; CHECK-NEXT: ldr d1, [x1]
3359 ; CHECK-NEXT: sli d0, d1, #1
3361 %tmp1 = load <1 x i64>, ptr %A
3362 %tmp2 = load <1 x i64>, ptr %B
3363 %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
3367 define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind {
3368 ; CHECK-LABEL: sli16b:
3370 ; CHECK-NEXT: ldr q0, [x0]
3371 ; CHECK-NEXT: ldr q1, [x1]
3372 ; CHECK-NEXT: sli.16b v0, v1, #1
3374 %tmp1 = load <16 x i8>, ptr %A
3375 %tmp2 = load <16 x i8>, ptr %B
3376 %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
3380 define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind {
3381 ; CHECK-LABEL: sli8h:
3383 ; CHECK-NEXT: ldr q0, [x0]
3384 ; CHECK-NEXT: ldr q1, [x1]
3385 ; CHECK-NEXT: sli.8h v0, v1, #1
3387 %tmp1 = load <8 x i16>, ptr %A
3388 %tmp2 = load <8 x i16>, ptr %B
3389 %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
3393 define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind {
3394 ; CHECK-LABEL: sli4s:
3396 ; CHECK-NEXT: ldr q0, [x0]
3397 ; CHECK-NEXT: ldr q1, [x1]
3398 ; CHECK-NEXT: sli.4s v0, v1, #1
3400 %tmp1 = load <4 x i32>, ptr %A
3401 %tmp2 = load <4 x i32>, ptr %B
3402 %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
3406 define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind {
3407 ; CHECK-LABEL: sli2d:
3409 ; CHECK-NEXT: ldr q0, [x0]
3410 ; CHECK-NEXT: ldr q1, [x1]
3411 ; CHECK-NEXT: sli.2d v0, v1, #1
3413 %tmp1 = load <2 x i64>, ptr %A
3414 %tmp2 = load <2 x i64>, ptr %B
3415 %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
3419 declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
3420 declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
3421 declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
3422 declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
3424 declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
3425 declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
3426 declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
3427 declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
3429 define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
3430 ; CHECK-LABEL: ashr_v1i64:
3432 ; CHECK-NEXT: neg d1, d1
3433 ; CHECK-NEXT: sshl d0, d0, d1
3435 %c = ashr <1 x i64> %a, %b
3439 define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3440 ; CHECK-LABEL: sqshl_zero_shift_amount:
3441 ; CHECK: // %bb.0: // %entry
3442 ; CHECK-NEXT: addp.2d v0, v0, v1
3443 ; CHECK-NEXT: str q0, [x0]
3446 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3447 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3448 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3452 define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3453 ; CHECK-LABEL: uqshl_zero_shift_amount:
3454 ; CHECK: // %bb.0: // %entry
3455 ; CHECK-NEXT: addp.2d v0, v0, v1
3456 ; CHECK-NEXT: str q0, [x0]
3459 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3460 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3461 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3465 define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3466 ; CHECK-LABEL: srshl_zero_shift_amount:
3467 ; CHECK: // %bb.0: // %entry
3468 ; CHECK-NEXT: addp.2d v0, v0, v1
3469 ; CHECK-NEXT: str q0, [x0]
3472 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3473 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3474 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3478 define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3479 ; CHECK-LABEL: urshl_zero_shift_amount:
3480 ; CHECK: // %bb.0: // %entry
3481 ; CHECK-NEXT: addp.2d v0, v0, v1
3482 ; CHECK-NEXT: str q0, [x0]
3485 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3486 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3487 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3491 define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3492 ; CHECK-LABEL: sqshlu_zero_shift_amount:
3493 ; CHECK: // %bb.0: // %entry
3494 ; CHECK-NEXT: addp.2d v0, v0, v1
3495 ; CHECK-NEXT: sqshlu.2d v0, v0, #0
3496 ; CHECK-NEXT: str q0, [x0]
3499 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3500 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3501 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3505 define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3506 ; CHECK-LABEL: sshl_zero_shift_amount:
3507 ; CHECK: // %bb.0: // %entry
3508 ; CHECK-NEXT: addp.2d v0, v0, v1
3509 ; CHECK-NEXT: str q0, [x0]
3512 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3513 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3514 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3518 define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3519 ; CHECK-LABEL: ushl_zero_shift_amount:
3520 ; CHECK: // %bb.0: // %entry
3521 ; CHECK-NEXT: addp.2d v0, v0, v1
3522 ; CHECK-NEXT: str q0, [x0]
3525 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3526 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3527 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3531 declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)