1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s
4 define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind {
5 ; CHECK-LABEL: sqshl8b:
7 ; CHECK-NEXT: ldr d0, [x0]
8 ; CHECK-NEXT: ldr d1, [x1]
9 ; CHECK-NEXT: sqshl.8b v0, v0, v1
11 %tmp1 = load <8 x i8>, ptr %A
12 %tmp2 = load <8 x i8>, ptr %B
13 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
17 define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind {
18 ; CHECK-LABEL: sqshl4h:
20 ; CHECK-NEXT: ldr d0, [x0]
21 ; CHECK-NEXT: ldr d1, [x1]
22 ; CHECK-NEXT: sqshl.4h v0, v0, v1
24 %tmp1 = load <4 x i16>, ptr %A
25 %tmp2 = load <4 x i16>, ptr %B
26 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
30 define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind {
31 ; CHECK-LABEL: sqshl2s:
33 ; CHECK-NEXT: ldr d0, [x0]
34 ; CHECK-NEXT: ldr d1, [x1]
35 ; CHECK-NEXT: sqshl.2s v0, v0, v1
37 %tmp1 = load <2 x i32>, ptr %A
38 %tmp2 = load <2 x i32>, ptr %B
39 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
43 define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind {
44 ; CHECK-LABEL: sqshl1d:
46 ; CHECK-NEXT: ldr d0, [x0]
47 ; CHECK-NEXT: ldr d1, [x1]
48 ; CHECK-NEXT: sqshl d0, d0, d1
50 %tmp1 = load <1 x i64>, ptr %A
51 %tmp2 = load <1 x i64>, ptr %B
52 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
56 define <1 x i64> @sqshl1d_constant(ptr %A) nounwind {
57 ; CHECK-LABEL: sqshl1d_constant:
59 ; CHECK-NEXT: ldr d0, [x0]
60 ; CHECK-NEXT: sqshl d0, d0, #1
62 %tmp1 = load <1 x i64>, ptr %A
63 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
67 define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind {
68 ; CHECK-LABEL: sqshl_scalar:
70 ; CHECK-NEXT: ldr x8, [x0]
71 ; CHECK-NEXT: ldr x9, [x1]
72 ; CHECK-NEXT: fmov d0, x8
73 ; CHECK-NEXT: fmov d1, x9
74 ; CHECK-NEXT: sqshl d0, d0, d1
75 ; CHECK-NEXT: fmov x0, d0
77 %tmp1 = load i64, ptr %A
78 %tmp2 = load i64, ptr %B
79 %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2)
83 define i64 @sqshl_scalar_constant(ptr %A) nounwind {
84 ; CHECK-LABEL: sqshl_scalar_constant:
86 ; CHECK-NEXT: ldr d0, [x0]
87 ; CHECK-NEXT: sqshl d0, d0, #1
88 ; CHECK-NEXT: fmov x0, d0
90 %tmp1 = load i64, ptr %A
91 %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1)
95 define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind {
96 ; CHECK-LABEL: uqshl8b:
98 ; CHECK-NEXT: ldr d0, [x0]
99 ; CHECK-NEXT: ldr d1, [x1]
100 ; CHECK-NEXT: uqshl.8b v0, v0, v1
102 %tmp1 = load <8 x i8>, ptr %A
103 %tmp2 = load <8 x i8>, ptr %B
104 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
108 define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind {
109 ; CHECK-LABEL: uqshl4h:
111 ; CHECK-NEXT: ldr d0, [x0]
112 ; CHECK-NEXT: ldr d1, [x1]
113 ; CHECK-NEXT: uqshl.4h v0, v0, v1
115 %tmp1 = load <4 x i16>, ptr %A
116 %tmp2 = load <4 x i16>, ptr %B
117 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
121 define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind {
122 ; CHECK-LABEL: uqshl2s:
124 ; CHECK-NEXT: ldr d0, [x0]
125 ; CHECK-NEXT: ldr d1, [x1]
126 ; CHECK-NEXT: uqshl.2s v0, v0, v1
128 %tmp1 = load <2 x i32>, ptr %A
129 %tmp2 = load <2 x i32>, ptr %B
130 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
134 define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind {
135 ; CHECK-LABEL: sqshl16b:
137 ; CHECK-NEXT: ldr q0, [x0]
138 ; CHECK-NEXT: ldr q1, [x1]
139 ; CHECK-NEXT: sqshl.16b v0, v0, v1
141 %tmp1 = load <16 x i8>, ptr %A
142 %tmp2 = load <16 x i8>, ptr %B
143 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
147 define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind {
148 ; CHECK-LABEL: sqshl8h:
150 ; CHECK-NEXT: ldr q0, [x0]
151 ; CHECK-NEXT: ldr q1, [x1]
152 ; CHECK-NEXT: sqshl.8h v0, v0, v1
154 %tmp1 = load <8 x i16>, ptr %A
155 %tmp2 = load <8 x i16>, ptr %B
156 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
160 define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind {
161 ; CHECK-LABEL: sqshl4s:
163 ; CHECK-NEXT: ldr q0, [x0]
164 ; CHECK-NEXT: ldr q1, [x1]
165 ; CHECK-NEXT: sqshl.4s v0, v0, v1
167 %tmp1 = load <4 x i32>, ptr %A
168 %tmp2 = load <4 x i32>, ptr %B
169 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
173 define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind {
174 ; CHECK-LABEL: sqshl2d:
176 ; CHECK-NEXT: ldr q0, [x0]
177 ; CHECK-NEXT: ldr q1, [x1]
178 ; CHECK-NEXT: sqshl.2d v0, v0, v1
180 %tmp1 = load <2 x i64>, ptr %A
181 %tmp2 = load <2 x i64>, ptr %B
182 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
186 define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind {
187 ; CHECK-LABEL: uqshl16b:
189 ; CHECK-NEXT: ldr q0, [x0]
190 ; CHECK-NEXT: ldr q1, [x1]
191 ; CHECK-NEXT: uqshl.16b v0, v0, v1
193 %tmp1 = load <16 x i8>, ptr %A
194 %tmp2 = load <16 x i8>, ptr %B
195 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
199 define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind {
200 ; CHECK-LABEL: uqshl8h:
202 ; CHECK-NEXT: ldr q0, [x0]
203 ; CHECK-NEXT: ldr q1, [x1]
204 ; CHECK-NEXT: uqshl.8h v0, v0, v1
206 %tmp1 = load <8 x i16>, ptr %A
207 %tmp2 = load <8 x i16>, ptr %B
208 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
212 define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind {
213 ; CHECK-LABEL: uqshl4s:
215 ; CHECK-NEXT: ldr q0, [x0]
216 ; CHECK-NEXT: ldr q1, [x1]
217 ; CHECK-NEXT: uqshl.4s v0, v0, v1
219 %tmp1 = load <4 x i32>, ptr %A
220 %tmp2 = load <4 x i32>, ptr %B
221 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
225 define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind {
226 ; CHECK-LABEL: uqshl2d:
228 ; CHECK-NEXT: ldr q0, [x0]
229 ; CHECK-NEXT: ldr q1, [x1]
230 ; CHECK-NEXT: uqshl.2d v0, v0, v1
232 %tmp1 = load <2 x i64>, ptr %A
233 %tmp2 = load <2 x i64>, ptr %B
234 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
238 define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind {
239 ; CHECK-LABEL: uqshl1d:
241 ; CHECK-NEXT: ldr d0, [x0]
242 ; CHECK-NEXT: ldr d1, [x1]
243 ; CHECK-NEXT: uqshl d0, d0, d1
245 %tmp1 = load <1 x i64>, ptr %A
246 %tmp2 = load <1 x i64>, ptr %B
247 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
251 define <1 x i64> @uqshl1d_constant(ptr %A) nounwind {
252 ; CHECK-LABEL: uqshl1d_constant:
254 ; CHECK-NEXT: ldr d0, [x0]
255 ; CHECK-NEXT: uqshl d0, d0, #1
257 %tmp1 = load <1 x i64>, ptr %A
258 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
262 define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind {
263 ; CHECK-LABEL: uqshl_scalar:
265 ; CHECK-NEXT: ldr x8, [x0]
266 ; CHECK-NEXT: ldr x9, [x1]
267 ; CHECK-NEXT: fmov d0, x8
268 ; CHECK-NEXT: fmov d1, x9
269 ; CHECK-NEXT: uqshl d0, d0, d1
270 ; CHECK-NEXT: fmov x0, d0
272 %tmp1 = load i64, ptr %A
273 %tmp2 = load i64, ptr %B
274 %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2)
278 define i64 @uqshl_scalar_constant(ptr %A) nounwind {
279 ; CHECK-LABEL: uqshl_scalar_constant:
281 ; CHECK-NEXT: ldr d0, [x0]
282 ; CHECK-NEXT: uqshl d0, d0, #1
283 ; CHECK-NEXT: fmov x0, d0
285 %tmp1 = load i64, ptr %A
286 %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1)
290 declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
291 declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
292 declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
293 declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
294 declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone
297 declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
298 declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
299 declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
300 declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
301 declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone
303 declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
304 declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
305 declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
306 declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
308 declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
309 declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
310 declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
311 declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
313 define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind {
314 ; CHECK-LABEL: srshl8b:
316 ; CHECK-NEXT: ldr d0, [x0]
317 ; CHECK-NEXT: ldr d1, [x1]
318 ; CHECK-NEXT: srshl.8b v0, v0, v1
320 %tmp1 = load <8 x i8>, ptr %A
321 %tmp2 = load <8 x i8>, ptr %B
322 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
326 define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind {
327 ; CHECK-LABEL: srshl4h:
329 ; CHECK-NEXT: ldr d0, [x0]
330 ; CHECK-NEXT: ldr d1, [x1]
331 ; CHECK-NEXT: srshl.4h v0, v0, v1
333 %tmp1 = load <4 x i16>, ptr %A
334 %tmp2 = load <4 x i16>, ptr %B
335 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
339 define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind {
340 ; CHECK-LABEL: srshl2s:
342 ; CHECK-NEXT: ldr d0, [x0]
343 ; CHECK-NEXT: ldr d1, [x1]
344 ; CHECK-NEXT: srshl.2s v0, v0, v1
346 %tmp1 = load <2 x i32>, ptr %A
347 %tmp2 = load <2 x i32>, ptr %B
348 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
352 define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind {
353 ; CHECK-LABEL: srshl1d:
355 ; CHECK-NEXT: ldr d0, [x0]
356 ; CHECK-NEXT: ldr d1, [x1]
357 ; CHECK-NEXT: srshl d0, d0, d1
359 %tmp1 = load <1 x i64>, ptr %A
360 %tmp2 = load <1 x i64>, ptr %B
361 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
365 define <1 x i64> @srshl1d_constant(ptr %A) nounwind {
366 ; CHECK-LABEL: srshl1d_constant:
368 ; CHECK-NEXT: mov w8, #1 // =0x1
369 ; CHECK-NEXT: ldr d0, [x0]
370 ; CHECK-NEXT: fmov d1, x8
371 ; CHECK-NEXT: srshl d0, d0, d1
373 %tmp1 = load <1 x i64>, ptr %A
374 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
378 define i64 @srshl_scalar(ptr %A, ptr %B) nounwind {
379 ; CHECK-LABEL: srshl_scalar:
381 ; CHECK-NEXT: ldr x8, [x0]
382 ; CHECK-NEXT: ldr x9, [x1]
383 ; CHECK-NEXT: fmov d0, x8
384 ; CHECK-NEXT: fmov d1, x9
385 ; CHECK-NEXT: srshl d0, d0, d1
386 ; CHECK-NEXT: fmov x0, d0
388 %tmp1 = load i64, ptr %A
389 %tmp2 = load i64, ptr %B
390 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2)
394 define i64 @srshl_scalar_constant(ptr %A) nounwind {
395 ; CHECK-LABEL: srshl_scalar_constant:
397 ; CHECK-NEXT: ldr x8, [x0]
398 ; CHECK-NEXT: mov w9, #1 // =0x1
399 ; CHECK-NEXT: fmov d1, x9
400 ; CHECK-NEXT: fmov d0, x8
401 ; CHECK-NEXT: srshl d0, d0, d1
402 ; CHECK-NEXT: fmov x0, d0
404 %tmp1 = load i64, ptr %A
405 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1)
409 define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind {
410 ; CHECK-LABEL: urshl8b:
412 ; CHECK-NEXT: ldr d0, [x0]
413 ; CHECK-NEXT: ldr d1, [x1]
414 ; CHECK-NEXT: urshl.8b v0, v0, v1
416 %tmp1 = load <8 x i8>, ptr %A
417 %tmp2 = load <8 x i8>, ptr %B
418 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
422 define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind {
423 ; CHECK-LABEL: urshl4h:
425 ; CHECK-NEXT: ldr d0, [x0]
426 ; CHECK-NEXT: ldr d1, [x1]
427 ; CHECK-NEXT: urshl.4h v0, v0, v1
429 %tmp1 = load <4 x i16>, ptr %A
430 %tmp2 = load <4 x i16>, ptr %B
431 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
435 define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind {
436 ; CHECK-LABEL: urshl2s:
438 ; CHECK-NEXT: ldr d0, [x0]
439 ; CHECK-NEXT: ldr d1, [x1]
440 ; CHECK-NEXT: urshl.2s v0, v0, v1
442 %tmp1 = load <2 x i32>, ptr %A
443 %tmp2 = load <2 x i32>, ptr %B
444 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
448 define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind {
449 ; CHECK-LABEL: urshl1d:
451 ; CHECK-NEXT: ldr d0, [x0]
452 ; CHECK-NEXT: ldr d1, [x1]
453 ; CHECK-NEXT: urshl d0, d0, d1
455 %tmp1 = load <1 x i64>, ptr %A
456 %tmp2 = load <1 x i64>, ptr %B
457 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
461 define <1 x i64> @urshl1d_constant(ptr %A) nounwind {
462 ; CHECK-LABEL: urshl1d_constant:
464 ; CHECK-NEXT: mov w8, #1 // =0x1
465 ; CHECK-NEXT: ldr d0, [x0]
466 ; CHECK-NEXT: fmov d1, x8
467 ; CHECK-NEXT: urshl d0, d0, d1
469 %tmp1 = load <1 x i64>, ptr %A
470 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
474 define i64 @urshl_scalar(ptr %A, ptr %B) nounwind {
475 ; CHECK-LABEL: urshl_scalar:
477 ; CHECK-NEXT: ldr x8, [x0]
478 ; CHECK-NEXT: ldr x9, [x1]
479 ; CHECK-NEXT: fmov d0, x8
480 ; CHECK-NEXT: fmov d1, x9
481 ; CHECK-NEXT: urshl d0, d0, d1
482 ; CHECK-NEXT: fmov x0, d0
484 %tmp1 = load i64, ptr %A
485 %tmp2 = load i64, ptr %B
486 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2)
490 define i64 @urshl_scalar_constant(ptr %A) nounwind {
491 ; CHECK-LABEL: urshl_scalar_constant:
493 ; CHECK-NEXT: ldr x8, [x0]
494 ; CHECK-NEXT: mov w9, #1 // =0x1
495 ; CHECK-NEXT: fmov d1, x9
496 ; CHECK-NEXT: fmov d0, x8
497 ; CHECK-NEXT: urshl d0, d0, d1
498 ; CHECK-NEXT: fmov x0, d0
500 %tmp1 = load i64, ptr %A
501 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1)
505 define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind {
506 ; CHECK-LABEL: srshl16b:
508 ; CHECK-NEXT: ldr q0, [x0]
509 ; CHECK-NEXT: ldr q1, [x1]
510 ; CHECK-NEXT: srshl.16b v0, v0, v1
512 %tmp1 = load <16 x i8>, ptr %A
513 %tmp2 = load <16 x i8>, ptr %B
514 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
518 define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind {
519 ; CHECK-LABEL: srshl8h:
521 ; CHECK-NEXT: ldr q0, [x0]
522 ; CHECK-NEXT: ldr q1, [x1]
523 ; CHECK-NEXT: srshl.8h v0, v0, v1
525 %tmp1 = load <8 x i16>, ptr %A
526 %tmp2 = load <8 x i16>, ptr %B
527 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
531 define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind {
532 ; CHECK-LABEL: srshl4s:
534 ; CHECK-NEXT: ldr q0, [x0]
535 ; CHECK-NEXT: ldr q1, [x1]
536 ; CHECK-NEXT: srshl.4s v0, v0, v1
538 %tmp1 = load <4 x i32>, ptr %A
539 %tmp2 = load <4 x i32>, ptr %B
540 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
544 define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind {
545 ; CHECK-LABEL: srshl2d:
547 ; CHECK-NEXT: ldr q0, [x0]
548 ; CHECK-NEXT: ldr q1, [x1]
549 ; CHECK-NEXT: srshl.2d v0, v0, v1
551 %tmp1 = load <2 x i64>, ptr %A
552 %tmp2 = load <2 x i64>, ptr %B
553 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
557 define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind {
558 ; CHECK-LABEL: urshl16b:
560 ; CHECK-NEXT: ldr q0, [x0]
561 ; CHECK-NEXT: ldr q1, [x1]
562 ; CHECK-NEXT: urshl.16b v0, v0, v1
564 %tmp1 = load <16 x i8>, ptr %A
565 %tmp2 = load <16 x i8>, ptr %B
566 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
570 define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind {
571 ; CHECK-LABEL: urshl8h:
573 ; CHECK-NEXT: ldr q0, [x0]
574 ; CHECK-NEXT: ldr q1, [x1]
575 ; CHECK-NEXT: urshl.8h v0, v0, v1
577 %tmp1 = load <8 x i16>, ptr %A
578 %tmp2 = load <8 x i16>, ptr %B
579 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
583 define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind {
584 ; CHECK-LABEL: urshl4s:
586 ; CHECK-NEXT: ldr q0, [x0]
587 ; CHECK-NEXT: ldr q1, [x1]
588 ; CHECK-NEXT: urshl.4s v0, v0, v1
590 %tmp1 = load <4 x i32>, ptr %A
591 %tmp2 = load <4 x i32>, ptr %B
592 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
596 define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind {
597 ; CHECK-LABEL: urshl2d:
599 ; CHECK-NEXT: ldr q0, [x0]
600 ; CHECK-NEXT: ldr q1, [x1]
601 ; CHECK-NEXT: urshl.2d v0, v0, v1
603 %tmp1 = load <2 x i64>, ptr %A
604 %tmp2 = load <2 x i64>, ptr %B
605 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
609 declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
610 declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
611 declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
612 declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
613 declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone
615 declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
616 declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
617 declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
618 declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
619 declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone
621 declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
622 declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
623 declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
624 declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
626 declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
627 declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
628 declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
629 declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
631 define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind {
632 ; CHECK-LABEL: sqrshl8b:
634 ; CHECK-NEXT: ldr d0, [x0]
635 ; CHECK-NEXT: ldr d1, [x1]
636 ; CHECK-NEXT: sqrshl.8b v0, v0, v1
638 %tmp1 = load <8 x i8>, ptr %A
639 %tmp2 = load <8 x i8>, ptr %B
640 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
644 define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind {
645 ; CHECK-LABEL: sqrshl4h:
647 ; CHECK-NEXT: ldr d0, [x0]
648 ; CHECK-NEXT: ldr d1, [x1]
649 ; CHECK-NEXT: sqrshl.4h v0, v0, v1
651 %tmp1 = load <4 x i16>, ptr %A
652 %tmp2 = load <4 x i16>, ptr %B
653 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
657 define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind {
658 ; CHECK-LABEL: sqrshl2s:
660 ; CHECK-NEXT: ldr d0, [x0]
661 ; CHECK-NEXT: ldr d1, [x1]
662 ; CHECK-NEXT: sqrshl.2s v0, v0, v1
664 %tmp1 = load <2 x i32>, ptr %A
665 %tmp2 = load <2 x i32>, ptr %B
666 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
670 define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind {
671 ; CHECK-LABEL: uqrshl8b:
673 ; CHECK-NEXT: ldr d0, [x0]
674 ; CHECK-NEXT: ldr d1, [x1]
675 ; CHECK-NEXT: uqrshl.8b v0, v0, v1
677 %tmp1 = load <8 x i8>, ptr %A
678 %tmp2 = load <8 x i8>, ptr %B
679 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
683 define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind {
684 ; CHECK-LABEL: uqrshl4h:
686 ; CHECK-NEXT: ldr d0, [x0]
687 ; CHECK-NEXT: ldr d1, [x1]
688 ; CHECK-NEXT: uqrshl.4h v0, v0, v1
690 %tmp1 = load <4 x i16>, ptr %A
691 %tmp2 = load <4 x i16>, ptr %B
692 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
696 define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind {
697 ; CHECK-LABEL: uqrshl2s:
699 ; CHECK-NEXT: ldr d0, [x0]
700 ; CHECK-NEXT: ldr d1, [x1]
701 ; CHECK-NEXT: uqrshl.2s v0, v0, v1
703 %tmp1 = load <2 x i32>, ptr %A
704 %tmp2 = load <2 x i32>, ptr %B
705 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
709 define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind {
710 ; CHECK-LABEL: sqrshl16b:
712 ; CHECK-NEXT: ldr q0, [x0]
713 ; CHECK-NEXT: ldr q1, [x1]
714 ; CHECK-NEXT: sqrshl.16b v0, v0, v1
716 %tmp1 = load <16 x i8>, ptr %A
717 %tmp2 = load <16 x i8>, ptr %B
718 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
722 define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind {
723 ; CHECK-LABEL: sqrshl8h:
725 ; CHECK-NEXT: ldr q0, [x0]
726 ; CHECK-NEXT: ldr q1, [x1]
727 ; CHECK-NEXT: sqrshl.8h v0, v0, v1
729 %tmp1 = load <8 x i16>, ptr %A
730 %tmp2 = load <8 x i16>, ptr %B
731 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
735 define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind {
736 ; CHECK-LABEL: sqrshl4s:
738 ; CHECK-NEXT: ldr q0, [x0]
739 ; CHECK-NEXT: ldr q1, [x1]
740 ; CHECK-NEXT: sqrshl.4s v0, v0, v1
742 %tmp1 = load <4 x i32>, ptr %A
743 %tmp2 = load <4 x i32>, ptr %B
744 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
748 define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind {
749 ; CHECK-LABEL: sqrshl2d:
751 ; CHECK-NEXT: ldr q0, [x0]
752 ; CHECK-NEXT: ldr q1, [x1]
753 ; CHECK-NEXT: sqrshl.2d v0, v0, v1
755 %tmp1 = load <2 x i64>, ptr %A
756 %tmp2 = load <2 x i64>, ptr %B
757 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
761 define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind {
762 ; CHECK-LABEL: sqrshl1d:
764 ; CHECK-NEXT: ldr d0, [x0]
765 ; CHECK-NEXT: ldr d1, [x1]
766 ; CHECK-NEXT: sqrshl d0, d0, d1
768 %tmp1 = load <1 x i64>, ptr %A
769 %tmp2 = load <1 x i64>, ptr %B
770 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
774 define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind {
775 ; CHECK-LABEL: sqrshl1d_constant:
777 ; CHECK-NEXT: mov w8, #1 // =0x1
778 ; CHECK-NEXT: ldr d0, [x0]
779 ; CHECK-NEXT: fmov d1, x8
780 ; CHECK-NEXT: sqrshl d0, d0, d1
782 %tmp1 = load <1 x i64>, ptr %A
783 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
787 define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind {
788 ; CHECK-LABEL: sqrshl_scalar:
790 ; CHECK-NEXT: ldr x8, [x0]
791 ; CHECK-NEXT: ldr x9, [x1]
792 ; CHECK-NEXT: fmov d0, x8
793 ; CHECK-NEXT: fmov d1, x9
794 ; CHECK-NEXT: sqrshl d0, d0, d1
795 ; CHECK-NEXT: fmov x0, d0
797 %tmp1 = load i64, ptr %A
798 %tmp2 = load i64, ptr %B
799 %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2)
803 define i64 @sqrshl_scalar_constant(ptr %A) nounwind {
804 ; CHECK-LABEL: sqrshl_scalar_constant:
806 ; CHECK-NEXT: ldr x8, [x0]
807 ; CHECK-NEXT: mov w9, #1 // =0x1
808 ; CHECK-NEXT: fmov d1, x9
809 ; CHECK-NEXT: fmov d0, x8
810 ; CHECK-NEXT: sqrshl d0, d0, d1
811 ; CHECK-NEXT: fmov x0, d0
813 %tmp1 = load i64, ptr %A
814 %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1)
818 define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind {
819 ; CHECK-LABEL: uqrshl16b:
821 ; CHECK-NEXT: ldr q0, [x0]
822 ; CHECK-NEXT: ldr q1, [x1]
823 ; CHECK-NEXT: uqrshl.16b v0, v0, v1
825 %tmp1 = load <16 x i8>, ptr %A
826 %tmp2 = load <16 x i8>, ptr %B
827 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
831 define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind {
832 ; CHECK-LABEL: uqrshl8h:
834 ; CHECK-NEXT: ldr q0, [x0]
835 ; CHECK-NEXT: ldr q1, [x1]
836 ; CHECK-NEXT: uqrshl.8h v0, v0, v1
838 %tmp1 = load <8 x i16>, ptr %A
839 %tmp2 = load <8 x i16>, ptr %B
840 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
844 define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind {
845 ; CHECK-LABEL: uqrshl4s:
847 ; CHECK-NEXT: ldr q0, [x0]
848 ; CHECK-NEXT: ldr q1, [x1]
849 ; CHECK-NEXT: uqrshl.4s v0, v0, v1
851 %tmp1 = load <4 x i32>, ptr %A
852 %tmp2 = load <4 x i32>, ptr %B
853 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
857 define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind {
858 ; CHECK-LABEL: uqrshl2d:
860 ; CHECK-NEXT: ldr q0, [x0]
861 ; CHECK-NEXT: ldr q1, [x1]
862 ; CHECK-NEXT: uqrshl.2d v0, v0, v1
864 %tmp1 = load <2 x i64>, ptr %A
865 %tmp2 = load <2 x i64>, ptr %B
866 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
870 define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind {
871 ; CHECK-LABEL: uqrshl1d:
873 ; CHECK-NEXT: ldr d0, [x0]
874 ; CHECK-NEXT: ldr d1, [x1]
875 ; CHECK-NEXT: uqrshl d0, d0, d1
877 %tmp1 = load <1 x i64>, ptr %A
878 %tmp2 = load <1 x i64>, ptr %B
879 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
883 define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind {
884 ; CHECK-LABEL: uqrshl1d_constant:
886 ; CHECK-NEXT: mov w8, #1 // =0x1
887 ; CHECK-NEXT: ldr d0, [x0]
888 ; CHECK-NEXT: fmov d1, x8
889 ; CHECK-NEXT: uqrshl d0, d0, d1
891 %tmp1 = load <1 x i64>, ptr %A
892 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
896 define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind {
897 ; CHECK-LABEL: uqrshl_scalar:
899 ; CHECK-NEXT: ldr x8, [x0]
900 ; CHECK-NEXT: ldr x9, [x1]
901 ; CHECK-NEXT: fmov d0, x8
902 ; CHECK-NEXT: fmov d1, x9
903 ; CHECK-NEXT: uqrshl d0, d0, d1
904 ; CHECK-NEXT: fmov x0, d0
906 %tmp1 = load i64, ptr %A
907 %tmp2 = load i64, ptr %B
908 %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2)
912 define i64 @uqrshl_scalar_constant(ptr %A) nounwind {
913 ; CHECK-LABEL: uqrshl_scalar_constant:
915 ; CHECK-NEXT: ldr x8, [x0]
916 ; CHECK-NEXT: mov w9, #1 // =0x1
917 ; CHECK-NEXT: fmov d1, x9
918 ; CHECK-NEXT: fmov d0, x8
919 ; CHECK-NEXT: uqrshl d0, d0, d1
920 ; CHECK-NEXT: fmov x0, d0
922 %tmp1 = load i64, ptr %A
923 %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1)
927 declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
928 declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
929 declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
930 declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
931 declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone
933 declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
934 declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
935 declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
936 declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
937 declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone
939 declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
940 declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
941 declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
942 declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
944 declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
945 declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
946 declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
947 declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
949 define <8 x i8> @urshr8b(ptr %A) nounwind {
950 ; CHECK-LABEL: urshr8b:
952 ; CHECK-NEXT: ldr d0, [x0]
953 ; CHECK-NEXT: urshr.8b v0, v0, #1
955 %tmp1 = load <8 x i8>, ptr %A
956 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
960 define <4 x i16> @urshr4h(ptr %A) nounwind {
961 ; CHECK-LABEL: urshr4h:
963 ; CHECK-NEXT: ldr d0, [x0]
964 ; CHECK-NEXT: urshr.4h v0, v0, #1
966 %tmp1 = load <4 x i16>, ptr %A
967 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
971 define <2 x i32> @urshr2s(ptr %A) nounwind {
972 ; CHECK-LABEL: urshr2s:
974 ; CHECK-NEXT: ldr d0, [x0]
975 ; CHECK-NEXT: urshr.2s v0, v0, #1
977 %tmp1 = load <2 x i32>, ptr %A
978 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
982 define <16 x i8> @urshr16b(ptr %A) nounwind {
983 ; CHECK-LABEL: urshr16b:
985 ; CHECK-NEXT: ldr q0, [x0]
986 ; CHECK-NEXT: urshr.16b v0, v0, #1
988 %tmp1 = load <16 x i8>, ptr %A
989 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
993 define <8 x i16> @urshr8h(ptr %A) nounwind {
994 ; CHECK-LABEL: urshr8h:
996 ; CHECK-NEXT: ldr q0, [x0]
997 ; CHECK-NEXT: urshr.8h v0, v0, #1
999 %tmp1 = load <8 x i16>, ptr %A
1000 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
1004 define <4 x i32> @urshr4s(ptr %A) nounwind {
1005 ; CHECK-LABEL: urshr4s:
1007 ; CHECK-NEXT: ldr q0, [x0]
1008 ; CHECK-NEXT: urshr.4s v0, v0, #1
1010 %tmp1 = load <4 x i32>, ptr %A
1011 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
1015 define <2 x i64> @urshr2d(ptr %A) nounwind {
1016 ; CHECK-LABEL: urshr2d:
1018 ; CHECK-NEXT: ldr q0, [x0]
1019 ; CHECK-NEXT: urshr.2d v0, v0, #1
1021 %tmp1 = load <2 x i64>, ptr %A
1022 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
1026 define <1 x i64> @urshr1d(ptr %A) nounwind {
1027 ; CHECK-LABEL: urshr1d:
1029 ; CHECK-NEXT: ldr d0, [x0]
1030 ; CHECK-NEXT: urshr d0, d0, #1
1032 %tmp1 = load <1 x i64>, ptr %A
1033 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
1037 define i64 @urshr_scalar(ptr %A) nounwind {
1038 ; CHECK-LABEL: urshr_scalar:
1040 ; CHECK-NEXT: ldr d0, [x0]
1041 ; CHECK-NEXT: urshr d0, d0, #1
1042 ; CHECK-NEXT: fmov x0, d0
1044 %tmp1 = load i64, ptr %A
1045 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
1049 define <8 x i8> @srshr8b(ptr %A) nounwind {
1050 ; CHECK-LABEL: srshr8b:
1052 ; CHECK-NEXT: ldr d0, [x0]
1053 ; CHECK-NEXT: srshr.8b v0, v0, #1
1055 %tmp1 = load <8 x i8>, ptr %A
1056 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1060 define <4 x i16> @srshr4h(ptr %A) nounwind {
1061 ; CHECK-LABEL: srshr4h:
1063 ; CHECK-NEXT: ldr d0, [x0]
1064 ; CHECK-NEXT: srshr.4h v0, v0, #1
1066 %tmp1 = load <4 x i16>, ptr %A
1067 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
1071 define <2 x i32> @srshr2s(ptr %A) nounwind {
1072 ; CHECK-LABEL: srshr2s:
1074 ; CHECK-NEXT: ldr d0, [x0]
1075 ; CHECK-NEXT: srshr.2s v0, v0, #1
1077 %tmp1 = load <2 x i32>, ptr %A
1078 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
1082 define <16 x i8> @srshr16b(ptr %A) nounwind {
1083 ; CHECK-LABEL: srshr16b:
1085 ; CHECK-NEXT: ldr q0, [x0]
1086 ; CHECK-NEXT: srshr.16b v0, v0, #1
1088 %tmp1 = load <16 x i8>, ptr %A
1089 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1093 define <8 x i16> @srshr8h(ptr %A) nounwind {
1094 ; CHECK-LABEL: srshr8h:
1096 ; CHECK-NEXT: ldr q0, [x0]
1097 ; CHECK-NEXT: srshr.8h v0, v0, #1
1099 %tmp1 = load <8 x i16>, ptr %A
1100 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
1104 define <4 x i32> @srshr4s(ptr %A) nounwind {
1105 ; CHECK-LABEL: srshr4s:
1107 ; CHECK-NEXT: ldr q0, [x0]
1108 ; CHECK-NEXT: srshr.4s v0, v0, #1
1110 %tmp1 = load <4 x i32>, ptr %A
1111 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
1115 define <2 x i64> @srshr2d(ptr %A) nounwind {
1116 ; CHECK-LABEL: srshr2d:
1118 ; CHECK-NEXT: ldr q0, [x0]
1119 ; CHECK-NEXT: srshr.2d v0, v0, #1
1121 %tmp1 = load <2 x i64>, ptr %A
1122 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
1126 define <1 x i64> @srshr1d(ptr %A) nounwind {
1127 ; CHECK-LABEL: srshr1d:
1129 ; CHECK-NEXT: ldr d0, [x0]
1130 ; CHECK-NEXT: srshr d0, d0, #1
1132 %tmp1 = load <1 x i64>, ptr %A
1133 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
1137 define i64 @srshr_scalar(ptr %A) nounwind {
1138 ; CHECK-LABEL: srshr_scalar:
1140 ; CHECK-NEXT: ldr d0, [x0]
1141 ; CHECK-NEXT: srshr d0, d0, #1
1142 ; CHECK-NEXT: fmov x0, d0
1144 %tmp1 = load i64, ptr %A
1145 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
1149 define <8 x i8> @sqshlu8b(ptr %A) nounwind {
1150 ; CHECK-LABEL: sqshlu8b:
1152 ; CHECK-NEXT: ldr d0, [x0]
1153 ; CHECK-NEXT: sqshlu.8b v0, v0, #1
1155 %tmp1 = load <8 x i8>, ptr %A
1156 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1160 define <4 x i16> @sqshlu4h(ptr %A) nounwind {
1161 ; CHECK-LABEL: sqshlu4h:
1163 ; CHECK-NEXT: ldr d0, [x0]
1164 ; CHECK-NEXT: sqshlu.4h v0, v0, #1
1166 %tmp1 = load <4 x i16>, ptr %A
1167 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
1171 define <2 x i32> @sqshlu2s(ptr %A) nounwind {
1172 ; CHECK-LABEL: sqshlu2s:
1174 ; CHECK-NEXT: ldr d0, [x0]
1175 ; CHECK-NEXT: sqshlu.2s v0, v0, #1
1177 %tmp1 = load <2 x i32>, ptr %A
1178 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
1182 define <16 x i8> @sqshlu16b(ptr %A) nounwind {
1183 ; CHECK-LABEL: sqshlu16b:
1185 ; CHECK-NEXT: ldr q0, [x0]
1186 ; CHECK-NEXT: sqshlu.16b v0, v0, #1
1188 %tmp1 = load <16 x i8>, ptr %A
1189 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1193 define <8 x i16> @sqshlu8h(ptr %A) nounwind {
1194 ; CHECK-LABEL: sqshlu8h:
1196 ; CHECK-NEXT: ldr q0, [x0]
1197 ; CHECK-NEXT: sqshlu.8h v0, v0, #1
1199 %tmp1 = load <8 x i16>, ptr %A
1200 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
1204 define <4 x i32> @sqshlu4s(ptr %A) nounwind {
1205 ; CHECK-LABEL: sqshlu4s:
1207 ; CHECK-NEXT: ldr q0, [x0]
1208 ; CHECK-NEXT: sqshlu.4s v0, v0, #1
1210 %tmp1 = load <4 x i32>, ptr %A
1211 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
1215 define <2 x i64> @sqshlu2d(ptr %A) nounwind {
1216 ; CHECK-LABEL: sqshlu2d:
1218 ; CHECK-NEXT: ldr q0, [x0]
1219 ; CHECK-NEXT: sqshlu.2d v0, v0, #1
1221 %tmp1 = load <2 x i64>, ptr %A
1222 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
1226 define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind {
1227 ; CHECK-LABEL: sqshlu1d_constant:
1229 ; CHECK-NEXT: ldr d0, [x0]
1230 ; CHECK-NEXT: sqshlu d0, d0, #1
1232 %tmp1 = load <1 x i64>, ptr %A
1233 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
1237 define i64 @sqshlu_i64_constant(ptr %A) nounwind {
1238 ; CHECK-LABEL: sqshlu_i64_constant:
1240 ; CHECK-NEXT: ldr d0, [x0]
1241 ; CHECK-NEXT: sqshlu d0, d0, #1
1242 ; CHECK-NEXT: fmov x0, d0
1244 %tmp1 = load i64, ptr %A
1245 %tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1)
1249 define i32 @sqshlu_i32_constant(ptr %A) nounwind {
1250 ; CHECK-LABEL: sqshlu_i32_constant:
1252 ; CHECK-NEXT: ldr w8, [x0]
1253 ; CHECK-NEXT: fmov s0, w8
1254 ; CHECK-NEXT: sqshlu s0, s0, #1
1255 ; CHECK-NEXT: fmov w0, s0
1257 %tmp1 = load i32, ptr %A
1258 %tmp3 = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %tmp1, i32 1)
1262 declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
1263 declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
1264 declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
1265 declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
1266 declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone
1267 declare i32 @llvm.aarch64.neon.sqshlu.i32(i32, i32) nounwind readnone
1269 declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
1270 declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
1271 declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
1272 declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
1274 define <8 x i8> @rshrn8b(ptr %A) nounwind {
1275 ; CHECK-LABEL: rshrn8b:
1277 ; CHECK-NEXT: ldr q0, [x0]
1278 ; CHECK-NEXT: rshrn.8b v0, v0, #1
1280 %tmp1 = load <8 x i16>, ptr %A
1281 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
1285 define <4 x i16> @rshrn4h(ptr %A) nounwind {
1286 ; CHECK-LABEL: rshrn4h:
1288 ; CHECK-NEXT: ldr q0, [x0]
1289 ; CHECK-NEXT: rshrn.4h v0, v0, #1
1291 %tmp1 = load <4 x i32>, ptr %A
1292 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
1296 define <2 x i32> @rshrn2s(ptr %A) nounwind {
1297 ; CHECK-LABEL: rshrn2s:
1299 ; CHECK-NEXT: ldr q0, [x0]
1300 ; CHECK-NEXT: rshrn.2s v0, v0, #1
1302 %tmp1 = load <2 x i64>, ptr %A
1303 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
1307 define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind {
1308 ; CHECK-LABEL: rshrn16b:
1310 ; CHECK-NEXT: ldr d0, [x0]
1311 ; CHECK-NEXT: ldr q1, [x1]
1312 ; CHECK-NEXT: rshrn2.16b v0, v1, #1
1314 %out = load <8 x i8>, ptr %ret
1315 %tmp1 = load <8 x i16>, ptr %A
1316 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
1317 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1321 define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind {
1322 ; CHECK-LABEL: rshrn8h:
1324 ; CHECK-NEXT: ldr d0, [x0]
1325 ; CHECK-NEXT: ldr q1, [x1]
1326 ; CHECK-NEXT: rshrn2.8h v0, v1, #1
1328 %out = load <4 x i16>, ptr %ret
1329 %tmp1 = load <4 x i32>, ptr %A
1330 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
1331 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1335 define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind {
1336 ; CHECK-LABEL: rshrn4s:
1338 ; CHECK-NEXT: ldr d0, [x0]
1339 ; CHECK-NEXT: ldr q1, [x1]
1340 ; CHECK-NEXT: rshrn2.4s v0, v1, #1
1342 %out = load <2 x i32>, ptr %ret
1343 %tmp1 = load <2 x i64>, ptr %A
1344 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
1345 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1349 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
1350 declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
1351 declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
1353 define <8 x i8> @shrn8b(ptr %A) nounwind {
1354 ; CHECK-LABEL: shrn8b:
1356 ; CHECK-NEXT: ldr q0, [x0]
1357 ; CHECK-NEXT: shrn.8b v0, v0, #1
1359 %tmp1 = load <8 x i16>, ptr %A
1360 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1361 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
1365 define <4 x i16> @shrn4h(ptr %A) nounwind {
1366 ; CHECK-LABEL: shrn4h:
1368 ; CHECK-NEXT: ldr q0, [x0]
1369 ; CHECK-NEXT: shrn.4h v0, v0, #1
1371 %tmp1 = load <4 x i32>, ptr %A
1372 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
1373 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
1377 define <2 x i32> @shrn2s(ptr %A) nounwind {
1378 ; CHECK-LABEL: shrn2s:
1380 ; CHECK-NEXT: ldr q0, [x0]
1381 ; CHECK-NEXT: shrn.2s v0, v0, #1
1383 %tmp1 = load <2 x i64>, ptr %A
1384 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
1385 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
1389 define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind {
1390 ; CHECK-LABEL: shrn16b:
1392 ; CHECK-NEXT: ldr d0, [x0]
1393 ; CHECK-NEXT: ldr q1, [x1]
1394 ; CHECK-NEXT: shrn2.16b v0, v1, #1
1396 %out = load <8 x i8>, ptr %ret
1397 %tmp1 = load <8 x i16>, ptr %A
1398 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1399 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
1400 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1404 define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind {
1405 ; CHECK-LABEL: shrn8h:
1407 ; CHECK-NEXT: ldr d0, [x0]
1408 ; CHECK-NEXT: ldr q1, [x1]
1409 ; CHECK-NEXT: shrn2.8h v0, v1, #1
1411 %out = load <4 x i16>, ptr %ret
1412 %tmp1 = load <4 x i32>, ptr %A
1413 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
1414 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
1415 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1419 define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind {
1420 ; CHECK-LABEL: shrn4s:
1422 ; CHECK-NEXT: ldr d0, [x0]
1423 ; CHECK-NEXT: ldr q1, [x1]
1424 ; CHECK-NEXT: shrn2.4s v0, v1, #1
1426 %out = load <2 x i32>, ptr %ret
1427 %tmp1 = load <2 x i64>, ptr %A
1428 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
1429 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
1430 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1434 declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
1435 declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
1436 declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
1438 define i32 @sqshrn1s(i64 %A) nounwind {
1439 ; CHECK-LABEL: sqshrn1s:
1441 ; CHECK-NEXT: fmov d0, x0
1442 ; CHECK-NEXT: sqshrn s0, d0, #1
1443 ; CHECK-NEXT: fmov w0, s0
1445 %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
1449 define <8 x i8> @sqshrn8b(ptr %A) nounwind {
1450 ; CHECK-LABEL: sqshrn8b:
1452 ; CHECK-NEXT: ldr q0, [x0]
1453 ; CHECK-NEXT: sqshrn.8b v0, v0, #1
1455 %tmp1 = load <8 x i16>, ptr %A
1456 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1460 define <4 x i16> @sqshrn4h(ptr %A) nounwind {
1461 ; CHECK-LABEL: sqshrn4h:
1463 ; CHECK-NEXT: ldr q0, [x0]
1464 ; CHECK-NEXT: sqshrn.4h v0, v0, #1
1466 %tmp1 = load <4 x i32>, ptr %A
1467 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1471 define <2 x i32> @sqshrn2s(ptr %A) nounwind {
1472 ; CHECK-LABEL: sqshrn2s:
1474 ; CHECK-NEXT: ldr q0, [x0]
1475 ; CHECK-NEXT: sqshrn.2s v0, v0, #1
1477 %tmp1 = load <2 x i64>, ptr %A
1478 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1483 define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind {
1484 ; CHECK-LABEL: sqshrn16b:
1486 ; CHECK-NEXT: ldr d0, [x0]
1487 ; CHECK-NEXT: ldr q1, [x1]
1488 ; CHECK-NEXT: sqshrn2.16b v0, v1, #1
1490 %out = load <8 x i8>, ptr %ret
1491 %tmp1 = load <8 x i16>, ptr %A
1492 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1493 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1497 define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind {
1498 ; CHECK-LABEL: sqshrn8h:
1500 ; CHECK-NEXT: ldr d0, [x0]
1501 ; CHECK-NEXT: ldr q1, [x1]
1502 ; CHECK-NEXT: sqshrn2.8h v0, v1, #1
1504 %out = load <4 x i16>, ptr %ret
1505 %tmp1 = load <4 x i32>, ptr %A
1506 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1507 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1511 define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind {
1512 ; CHECK-LABEL: sqshrn4s:
1514 ; CHECK-NEXT: ldr d0, [x0]
1515 ; CHECK-NEXT: ldr q1, [x1]
1516 ; CHECK-NEXT: sqshrn2.4s v0, v1, #1
1518 %out = load <2 x i32>, ptr %ret
1519 %tmp1 = load <2 x i64>, ptr %A
1520 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1521 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1525 declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
1526 declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
1527 declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
1528 declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
1530 define i32 @sqshrun1s(i64 %A) nounwind {
1531 ; CHECK-LABEL: sqshrun1s:
1533 ; CHECK-NEXT: fmov d0, x0
1534 ; CHECK-NEXT: sqshrun s0, d0, #1
1535 ; CHECK-NEXT: fmov w0, s0
1537 %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
1541 define <8 x i8> @sqshrun8b(ptr %A) nounwind {
1542 ; CHECK-LABEL: sqshrun8b:
1544 ; CHECK-NEXT: ldr q0, [x0]
1545 ; CHECK-NEXT: sqshrun.8b v0, v0, #1
1547 %tmp1 = load <8 x i16>, ptr %A
1548 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
1552 define <4 x i16> @sqshrun4h(ptr %A) nounwind {
1553 ; CHECK-LABEL: sqshrun4h:
1555 ; CHECK-NEXT: ldr q0, [x0]
1556 ; CHECK-NEXT: sqshrun.4h v0, v0, #1
1558 %tmp1 = load <4 x i32>, ptr %A
1559 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
1563 define <2 x i32> @sqshrun2s(ptr %A) nounwind {
1564 ; CHECK-LABEL: sqshrun2s:
1566 ; CHECK-NEXT: ldr q0, [x0]
1567 ; CHECK-NEXT: sqshrun.2s v0, v0, #1
1569 %tmp1 = load <2 x i64>, ptr %A
1570 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
1574 define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind {
1575 ; CHECK-LABEL: sqshrun16b:
1577 ; CHECK-NEXT: ldr d0, [x0]
1578 ; CHECK-NEXT: ldr q1, [x1]
1579 ; CHECK-NEXT: sqshrun2.16b v0, v1, #1
1581 %out = load <8 x i8>, ptr %ret
1582 %tmp1 = load <8 x i16>, ptr %A
1583 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
1584 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1588 define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind {
1589 ; CHECK-LABEL: sqshrun8h:
1591 ; CHECK-NEXT: ldr d0, [x0]
1592 ; CHECK-NEXT: ldr q1, [x1]
1593 ; CHECK-NEXT: sqshrun2.8h v0, v1, #1
1595 %out = load <4 x i16>, ptr %ret
1596 %tmp1 = load <4 x i32>, ptr %A
1597 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
1598 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1602 define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind {
1603 ; CHECK-LABEL: sqshrun4s:
1605 ; CHECK-NEXT: ldr d0, [x0]
1606 ; CHECK-NEXT: ldr q1, [x1]
1607 ; CHECK-NEXT: sqshrun2.4s v0, v1, #1
1609 %out = load <2 x i32>, ptr %ret
1610 %tmp1 = load <2 x i64>, ptr %A
1611 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
1612 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1616 declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
1617 declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
1618 declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
1619 declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
1621 define i32 @sqrshrn1s(i64 %A) nounwind {
1622 ; CHECK-LABEL: sqrshrn1s:
1624 ; CHECK-NEXT: fmov d0, x0
1625 ; CHECK-NEXT: sqrshrn s0, d0, #1
1626 ; CHECK-NEXT: fmov w0, s0
1628 %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
1632 define <8 x i8> @sqrshrn8b(ptr %A) nounwind {
1633 ; CHECK-LABEL: sqrshrn8b:
1635 ; CHECK-NEXT: ldr q0, [x0]
1636 ; CHECK-NEXT: sqrshrn.8b v0, v0, #1
1638 %tmp1 = load <8 x i16>, ptr %A
1639 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1643 define <4 x i16> @sqrshrn4h(ptr %A) nounwind {
1644 ; CHECK-LABEL: sqrshrn4h:
1646 ; CHECK-NEXT: ldr q0, [x0]
1647 ; CHECK-NEXT: sqrshrn.4h v0, v0, #1
1649 %tmp1 = load <4 x i32>, ptr %A
1650 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1654 define <2 x i32> @sqrshrn2s(ptr %A) nounwind {
1655 ; CHECK-LABEL: sqrshrn2s:
1657 ; CHECK-NEXT: ldr q0, [x0]
1658 ; CHECK-NEXT: sqrshrn.2s v0, v0, #1
1660 %tmp1 = load <2 x i64>, ptr %A
1661 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1665 define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind {
1666 ; CHECK-LABEL: sqrshrn16b:
1668 ; CHECK-NEXT: ldr d0, [x0]
1669 ; CHECK-NEXT: ldr q1, [x1]
1670 ; CHECK-NEXT: sqrshrn2.16b v0, v1, #1
1672 %out = load <8 x i8>, ptr %ret
1673 %tmp1 = load <8 x i16>, ptr %A
1674 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1675 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1679 define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind {
1680 ; CHECK-LABEL: sqrshrn8h:
1682 ; CHECK-NEXT: ldr d0, [x0]
1683 ; CHECK-NEXT: ldr q1, [x1]
1684 ; CHECK-NEXT: sqrshrn2.8h v0, v1, #1
1686 %out = load <4 x i16>, ptr %ret
1687 %tmp1 = load <4 x i32>, ptr %A
1688 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1689 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1693 define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind {
1694 ; CHECK-LABEL: sqrshrn4s:
1696 ; CHECK-NEXT: ldr d0, [x0]
1697 ; CHECK-NEXT: ldr q1, [x1]
1698 ; CHECK-NEXT: sqrshrn2.4s v0, v1, #1
1700 %out = load <2 x i32>, ptr %ret
1701 %tmp1 = load <2 x i64>, ptr %A
1702 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1703 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1707 declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
1708 declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
1709 declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
1710 declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
1712 define i32 @sqrshrun1s(i64 %A) nounwind {
1713 ; CHECK-LABEL: sqrshrun1s:
1715 ; CHECK-NEXT: fmov d0, x0
1716 ; CHECK-NEXT: sqrshrun s0, d0, #1
1717 ; CHECK-NEXT: fmov w0, s0
1719 %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
1723 define <8 x i8> @sqrshrun8b(ptr %A) nounwind {
1724 ; CHECK-LABEL: sqrshrun8b:
1726 ; CHECK-NEXT: ldr q0, [x0]
1727 ; CHECK-NEXT: sqrshrun.8b v0, v0, #1
1729 %tmp1 = load <8 x i16>, ptr %A
1730 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
1734 define <4 x i16> @sqrshrun4h(ptr %A) nounwind {
1735 ; CHECK-LABEL: sqrshrun4h:
1737 ; CHECK-NEXT: ldr q0, [x0]
1738 ; CHECK-NEXT: sqrshrun.4h v0, v0, #1
1740 %tmp1 = load <4 x i32>, ptr %A
1741 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
1745 define <2 x i32> @sqrshrun2s(ptr %A) nounwind {
1746 ; CHECK-LABEL: sqrshrun2s:
1748 ; CHECK-NEXT: ldr q0, [x0]
1749 ; CHECK-NEXT: sqrshrun.2s v0, v0, #1
1751 %tmp1 = load <2 x i64>, ptr %A
1752 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
1756 define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind {
1757 ; CHECK-LABEL: sqrshrun16b:
1759 ; CHECK-NEXT: ldr d0, [x0]
1760 ; CHECK-NEXT: ldr q1, [x1]
1761 ; CHECK-NEXT: sqrshrun2.16b v0, v1, #1
1763 %out = load <8 x i8>, ptr %ret
1764 %tmp1 = load <8 x i16>, ptr %A
1765 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
1766 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1770 define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind {
1771 ; CHECK-LABEL: sqrshrun8h:
1773 ; CHECK-NEXT: ldr d0, [x0]
1774 ; CHECK-NEXT: ldr q1, [x1]
1775 ; CHECK-NEXT: sqrshrun2.8h v0, v1, #1
1777 %out = load <4 x i16>, ptr %ret
1778 %tmp1 = load <4 x i32>, ptr %A
1779 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
1780 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1784 define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind {
1785 ; CHECK-LABEL: sqrshrun4s:
1787 ; CHECK-NEXT: ldr d0, [x0]
1788 ; CHECK-NEXT: ldr q1, [x1]
1789 ; CHECK-NEXT: sqrshrun2.4s v0, v1, #1
1791 %out = load <2 x i32>, ptr %ret
1792 %tmp1 = load <2 x i64>, ptr %A
1793 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
1794 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1798 declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
1799 declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
1800 declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
1801 declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
1803 define i32 @uqrshrn1s(i64 %A) nounwind {
1804 ; CHECK-LABEL: uqrshrn1s:
1806 ; CHECK-NEXT: fmov d0, x0
1807 ; CHECK-NEXT: uqrshrn s0, d0, #1
1808 ; CHECK-NEXT: fmov w0, s0
1810 %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
1814 define <8 x i8> @uqrshrn8b(ptr %A) nounwind {
1815 ; CHECK-LABEL: uqrshrn8b:
1817 ; CHECK-NEXT: ldr q0, [x0]
1818 ; CHECK-NEXT: uqrshrn.8b v0, v0, #1
1820 %tmp1 = load <8 x i16>, ptr %A
1821 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1825 define <4 x i16> @uqrshrn4h(ptr %A) nounwind {
1826 ; CHECK-LABEL: uqrshrn4h:
1828 ; CHECK-NEXT: ldr q0, [x0]
1829 ; CHECK-NEXT: uqrshrn.4h v0, v0, #1
1831 %tmp1 = load <4 x i32>, ptr %A
1832 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1836 define <2 x i32> @uqrshrn2s(ptr %A) nounwind {
1837 ; CHECK-LABEL: uqrshrn2s:
1839 ; CHECK-NEXT: ldr q0, [x0]
1840 ; CHECK-NEXT: uqrshrn.2s v0, v0, #1
1842 %tmp1 = load <2 x i64>, ptr %A
1843 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1847 define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind {
1848 ; CHECK-LABEL: uqrshrn16b:
1850 ; CHECK-NEXT: ldr d0, [x0]
1851 ; CHECK-NEXT: ldr q1, [x1]
1852 ; CHECK-NEXT: uqrshrn2.16b v0, v1, #1
1854 %out = load <8 x i8>, ptr %ret
1855 %tmp1 = load <8 x i16>, ptr %A
1856 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1857 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1861 define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind {
1862 ; CHECK-LABEL: uqrshrn8h:
1864 ; CHECK-NEXT: ldr d0, [x0]
1865 ; CHECK-NEXT: ldr q1, [x1]
1866 ; CHECK-NEXT: uqrshrn2.8h v0, v1, #1
1868 %out = load <4 x i16>, ptr %ret
1869 %tmp1 = load <4 x i32>, ptr %A
1870 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1871 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1875 define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind {
1876 ; CHECK-LABEL: uqrshrn4s:
1878 ; CHECK-NEXT: ldr d0, [x0]
1879 ; CHECK-NEXT: ldr q1, [x1]
1880 ; CHECK-NEXT: uqrshrn2.4s v0, v1, #1
1882 %out = load <2 x i32>, ptr %ret
1883 %tmp1 = load <2 x i64>, ptr %A
1884 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1885 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1889 declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
1890 declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
1891 declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
1892 declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
1894 define i32 @uqshrn1s(i64 %A) nounwind {
1895 ; CHECK-LABEL: uqshrn1s:
1897 ; CHECK-NEXT: fmov d0, x0
1898 ; CHECK-NEXT: uqshrn s0, d0, #1
1899 ; CHECK-NEXT: fmov w0, s0
1901 %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
1905 define <8 x i8> @uqshrn8b(ptr %A) nounwind {
1906 ; CHECK-LABEL: uqshrn8b:
1908 ; CHECK-NEXT: ldr q0, [x0]
1909 ; CHECK-NEXT: uqshrn.8b v0, v0, #1
1911 %tmp1 = load <8 x i16>, ptr %A
1912 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1916 define <4 x i16> @uqshrn4h(ptr %A) nounwind {
1917 ; CHECK-LABEL: uqshrn4h:
1919 ; CHECK-NEXT: ldr q0, [x0]
1920 ; CHECK-NEXT: uqshrn.4h v0, v0, #1
1922 %tmp1 = load <4 x i32>, ptr %A
1923 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1927 define <2 x i32> @uqshrn2s(ptr %A) nounwind {
1928 ; CHECK-LABEL: uqshrn2s:
1930 ; CHECK-NEXT: ldr q0, [x0]
1931 ; CHECK-NEXT: uqshrn.2s v0, v0, #1
1933 %tmp1 = load <2 x i64>, ptr %A
1934 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1938 define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind {
1939 ; CHECK-LABEL: uqshrn16b:
1941 ; CHECK-NEXT: ldr d0, [x0]
1942 ; CHECK-NEXT: ldr q1, [x1]
1943 ; CHECK-NEXT: uqshrn2.16b v0, v1, #1
1945 %out = load <8 x i8>, ptr %ret
1946 %tmp1 = load <8 x i16>, ptr %A
1947 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1948 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1952 define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind {
1953 ; CHECK-LABEL: uqshrn8h:
1955 ; CHECK-NEXT: ldr d0, [x0]
1956 ; CHECK-NEXT: ldr q1, [x1]
1957 ; CHECK-NEXT: uqshrn2.8h v0, v1, #1
1959 %out = load <4 x i16>, ptr %ret
1960 %tmp1 = load <4 x i32>, ptr %A
1961 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1962 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1966 define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind {
1967 ; CHECK-LABEL: uqshrn4s:
1969 ; CHECK-NEXT: ldr d0, [x0]
1970 ; CHECK-NEXT: ldr q1, [x1]
1971 ; CHECK-NEXT: uqshrn2.4s v0, v1, #1
1973 %out = load <2 x i32>, ptr %ret
1974 %tmp1 = load <2 x i64>, ptr %A
1975 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1976 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1980 declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
1981 declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
1982 declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
1983 declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
1985 define <8 x i16> @ushll8h(ptr %A) nounwind {
1986 ; CHECK-LABEL: ushll8h:
1988 ; CHECK-NEXT: ldr d0, [x0]
1989 ; CHECK-NEXT: ushll.8h v0, v0, #1
1991 %tmp1 = load <8 x i8>, ptr %A
1992 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
1993 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1997 define <4 x i32> @ushll4s(ptr %A) nounwind {
1998 ; CHECK-LABEL: ushll4s:
2000 ; CHECK-NEXT: ldr d0, [x0]
2001 ; CHECK-NEXT: ushll.4s v0, v0, #1
2003 %tmp1 = load <4 x i16>, ptr %A
2004 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2005 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
2009 define <2 x i64> @ushll2d(ptr %A) nounwind {
2010 ; CHECK-LABEL: ushll2d:
2012 ; CHECK-NEXT: ldr d0, [x0]
2013 ; CHECK-NEXT: ushll.2d v0, v0, #1
2015 %tmp1 = load <2 x i32>, ptr %A
2016 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
2017 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2021 define <8 x i16> @ushll2_8h(ptr %A) nounwind {
2022 ; CHECK-LABEL: ushll2_8h:
2024 ; CHECK-NEXT: ldr d0, [x0, #8]
2025 ; CHECK-NEXT: ushll.8h v0, v0, #1
2027 %load1 = load <16 x i8>, ptr %A
2028 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2029 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
2030 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2034 define <4 x i32> @ushll2_4s(ptr %A) nounwind {
2035 ; CHECK-LABEL: ushll2_4s:
2037 ; CHECK-NEXT: ldr d0, [x0, #8]
2038 ; CHECK-NEXT: ushll.4s v0, v0, #1
2040 %load1 = load <8 x i16>, ptr %A
2041 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2042 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2043 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
2047 define <2 x i64> @ushll2_2d(ptr %A) nounwind {
2048 ; CHECK-LABEL: ushll2_2d:
2050 ; CHECK-NEXT: ldr d0, [x0, #8]
2051 ; CHECK-NEXT: ushll.2d v0, v0, #1
2053 %load1 = load <4 x i32>, ptr %A
2054 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2055 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
2056 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2060 declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>)
2061 declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>)
2062 declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>)
2063 declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>)
2064 declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>)
2065 declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64)
2067 define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind {
2068 ; CHECK-LABEL: neon.ushll8h_constant_shift:
2070 ; CHECK-NEXT: ldr d0, [x0]
2071 ; CHECK-NEXT: ushll.8h v0, v0, #1
2073 %tmp1 = load <8 x i8>, ptr %A
2074 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
2075 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2079 define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind {
2080 ; CHECK-LABEL: neon.ushl8h_no_constant_shift:
2082 ; CHECK-NEXT: ldr d0, [x0]
2083 ; CHECK-NEXT: ushll.8h v0, v0, #0
2084 ; CHECK-NEXT: ushl.8h v0, v0, v0
2086 %tmp1 = load <8 x i8>, ptr %A
2087 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
2088 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp2)
2092 define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind {
2093 ; CHECK-LABEL: neon.ushl8h_constant_shift_extend_not_2x:
2095 ; CHECK-NEXT: ldr s0, [x0]
2096 ; CHECK-NEXT: ushll.8h v0, v0, #0
2097 ; CHECK-NEXT: ushll.4s v0, v0, #1
2099 %tmp1 = load <4 x i8>, ptr %A
2100 %tmp2 = zext <4 x i8> %tmp1 to <4 x i32>
2101 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2105 define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind {
2106 ; CHECK-LABEL: neon.ushl8_noext_constant_shift:
2108 ; CHECK-NEXT: ldr q0, [x0]
2109 ; CHECK-NEXT: add.8h v0, v0, v0
2111 %tmp1 = load <8 x i16>, ptr %A
2112 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2116 define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind {
2117 ; CHECK-LABEL: neon.ushll4s_constant_shift:
2119 ; CHECK-NEXT: ldr d0, [x0]
2120 ; CHECK-NEXT: ushll.4s v0, v0, #1
2122 %tmp1 = load <4 x i16>, ptr %A
2123 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2124 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2128 ; FIXME: unnecessary ushll.4s v0, v0, #0?
2129 define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind {
2130 ; CHECK-LABEL: neon.ushll4s_neg_constant_shift:
2132 ; CHECK-NEXT: ldr d0, [x0]
2133 ; CHECK-NEXT: ushll.4s v0, v0, #0
2134 ; CHECK-NEXT: ushr.4s v0, v0, #1
2136 %tmp1 = load <4 x i16>, ptr %A
2137 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2138 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
2142 ; FIXME: should be constant folded.
2143 define <4 x i32> @neon.ushll4s_constant_fold() nounwind {
2144 ; CHECK-LABEL: neon.ushll4s_constant_fold:
2146 ; CHECK-NEXT: adrp x8, .LCPI160_0
2147 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI160_0]
2148 ; CHECK-NEXT: add.4s v0, v0, v0
2150 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2154 define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind {
2155 ; CHECK-LABEL: neon.ushll2d_constant_shift:
2157 ; CHECK-NEXT: ldr d0, [x0]
2158 ; CHECK-NEXT: ushll.2d v0, v0, #1
2160 %tmp1 = load <2 x i32>, ptr %A
2161 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
2162 %tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
2166 define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind {
2167 ; CHECK-LABEL: neon.ushl_vscalar_constant_shift:
2169 ; CHECK-NEXT: movi.2d v1, #0000000000000000
2170 ; CHECK-NEXT: ldr s0, [x0]
2171 ; CHECK-NEXT: zip1.2s v0, v0, v1
2172 ; CHECK-NEXT: shl d0, d0, #1
2174 %tmp1 = load <1 x i32>, ptr %A
2175 %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
2176 %tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
2180 define i64 @neon.ushl_scalar_constant_shift(ptr %A) nounwind {
2181 ; CHECK-LABEL: neon.ushl_scalar_constant_shift:
2183 ; CHECK-NEXT: ldr w8, [x0]
2184 ; CHECK-NEXT: fmov d0, x8
2185 ; CHECK-NEXT: shl d0, d0, #1
2186 ; CHECK-NEXT: fmov x0, d0
2188 %tmp1 = load i32, ptr %A
2189 %tmp2 = zext i32 %tmp1 to i64
2190 %tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1)
2194 define <8 x i16> @sshll8h(ptr %A) nounwind {
2195 ; CHECK-LABEL: sshll8h:
2197 ; CHECK-NEXT: ldr d0, [x0]
2198 ; CHECK-NEXT: sshll.8h v0, v0, #1
2200 %tmp1 = load <8 x i8>, ptr %A
2201 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
2202 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2206 define <2 x i64> @sshll2d(ptr %A) nounwind {
2207 ; CHECK-LABEL: sshll2d:
2209 ; CHECK-NEXT: ldr d0, [x0]
2210 ; CHECK-NEXT: sshll.2d v0, v0, #1
2212 %tmp1 = load <2 x i32>, ptr %A
2213 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
2214 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2218 declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>)
2219 declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>)
2220 declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>)
2221 declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>)
2222 declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>)
2223 declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64)
2225 define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind {
2226 ; CHECK-LABEL: neon.sshl16b_constant_shift:
2228 ; CHECK-NEXT: ldr q0, [x0]
2229 ; CHECK-NEXT: add.16b v0, v0, v0
2231 %tmp1 = load <16 x i8>, ptr %A
2232 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2236 define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind {
2237 ; CHECK-LABEL: neon.sshl16b_non_splat_constant_shift:
2239 ; CHECK-NEXT: adrp x8, .LCPI167_0
2240 ; CHECK-NEXT: ldr q0, [x0]
2241 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI167_0]
2242 ; CHECK-NEXT: sshl.16b v0, v0, v1
2244 %tmp1 = load <16 x i8>, ptr %A
2245 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2249 define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind {
2250 ; CHECK-LABEL: neon.sshl16b_neg_constant_shift:
2252 ; CHECK-NEXT: ldr q0, [x0]
2253 ; CHECK-NEXT: sshr.16b v0, v0, #2
2255 %tmp1 = load <16 x i8>, ptr %A
2256 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
2260 define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind {
2261 ; CHECK-LABEL: neon.sshll8h_constant_shift:
2263 ; CHECK-NEXT: ldr d0, [x0]
2264 ; CHECK-NEXT: sshll.8h v0, v0, #1
2266 %tmp1 = load <8 x i8>, ptr %A
2267 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
2268 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2272 define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind {
2273 ; CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift:
2275 ; CHECK-NEXT: ldr s0, [x0]
2276 ; CHECK-NEXT: sshll.8h v0, v0, #0
2277 ; CHECK-NEXT: sshll.4s v0, v0, #1
2279 %tmp1 = load <4 x i8>, ptr %A
2280 %tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
2281 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2285 define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind {
2286 ; CHECK-LABEL: neon.sshll4s_constant_shift:
2288 ; CHECK-NEXT: ldr d0, [x0]
2289 ; CHECK-NEXT: sshll.4s v0, v0, #1
2291 %tmp1 = load <4 x i16>, ptr %A
2292 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
2293 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2297 define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind {
2298 ; CHECK-LABEL: neon.sshll4s_neg_constant_shift:
2300 ; CHECK-NEXT: ldr d0, [x0]
2301 ; CHECK-NEXT: sshll.4s v0, v0, #0
2302 ; CHECK-NEXT: sshr.4s v0, v0, #1
2304 %tmp1 = load <4 x i16>, ptr %A
2305 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
2306 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
2310 ; FIXME: should be constant folded.
2311 define <4 x i32> @neon.sshl4s_constant_fold() nounwind {
2312 ; CHECK-LABEL: neon.sshl4s_constant_fold:
2314 ; CHECK-NEXT: adrp x8, .LCPI173_0
2315 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI173_0]
2316 ; CHECK-NEXT: shl.4s v0, v0, #2
2318 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
2322 define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind {
2323 ; CHECK-LABEL: neon.sshl4s_no_fold:
2325 ; CHECK-NEXT: ldr q0, [x0]
2326 ; CHECK-NEXT: add.4s v0, v0, v0
2328 %tmp1 = load <4 x i32>, ptr %A
2329 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2333 define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind {
2334 ; CHECK-LABEL: neon.sshll2d_constant_shift:
2336 ; CHECK-NEXT: ldr d0, [x0]
2337 ; CHECK-NEXT: sshll.2d v0, v0, #1
2339 %tmp1 = load <2 x i32>, ptr %A
2340 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
2341 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
2345 define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind {
2346 ; CHECK-LABEL: neon.sshll_vscalar_constant_shift:
2348 ; CHECK-NEXT: movi.2d v1, #0000000000000000
2349 ; CHECK-NEXT: ldr s0, [x0]
2350 ; CHECK-NEXT: zip1.2s v0, v0, v1
2351 ; CHECK-NEXT: shl d0, d0, #1
2353 %tmp1 = load <1 x i32>, ptr %A
2354 %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
2355 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
2359 define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind {
2360 ; CHECK-LABEL: neon.sshll_scalar_constant_shift:
2362 ; CHECK-NEXT: ldr w8, [x0]
2363 ; CHECK-NEXT: fmov d0, x8
2364 ; CHECK-NEXT: shl d0, d0, #1
2365 ; CHECK-NEXT: fmov x0, d0
2367 %tmp1 = load i32, ptr %A
2368 %tmp2 = zext i32 %tmp1 to i64
2369 %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1)
2373 define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind {
2374 ; CHECK-LABEL: neon.sshll_scalar_constant_shift_m1:
2376 ; CHECK-NEXT: ldr w8, [x0]
2377 ; CHECK-NEXT: fmov d0, x8
2378 ; CHECK-NEXT: sshr d0, d0, #1
2379 ; CHECK-NEXT: fmov x0, d0
2381 %tmp1 = load i32, ptr %A
2382 %tmp2 = zext i32 %tmp1 to i64
2383 %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 -1)
2387 ; FIXME: should be constant folded.
2388 define <2 x i64> @neon.sshl2d_constant_fold() nounwind {
2389 ; CHECK-LABEL: neon.sshl2d_constant_fold:
2391 ; CHECK-NEXT: adrp x8, .LCPI179_0
2392 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI179_0]
2393 ; CHECK-NEXT: add.2d v0, v0, v0
2395 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
2399 define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind {
2400 ; CHECK-LABEL: neon.sshl2d_no_fold:
2402 ; CHECK-NEXT: ldr q0, [x0]
2403 ; CHECK-NEXT: shl.2d v0, v0, #2
2405 %tmp2 = load <2 x i64>, ptr %A
2406 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 2, i64 2>)
2410 define <8 x i16> @sshll2_8h(ptr %A) nounwind {
2411 ; CHECK-LABEL: sshll2_8h:
2413 ; CHECK-NEXT: ldr d0, [x0, #8]
2414 ; CHECK-NEXT: sshll.8h v0, v0, #1
2416 %load1 = load <16 x i8>, ptr %A
2417 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2418 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
2419 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2423 define <4 x i32> @sshll2_4s(ptr %A) nounwind {
2424 ; CHECK-LABEL: sshll2_4s:
2426 ; CHECK-NEXT: ldr d0, [x0, #8]
2427 ; CHECK-NEXT: sshll.4s v0, v0, #1
2429 %load1 = load <8 x i16>, ptr %A
2430 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2431 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
2432 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
2436 define <2 x i64> @sshll2_2d(ptr %A) nounwind {
2437 ; CHECK-LABEL: sshll2_2d:
2439 ; CHECK-NEXT: ldr d0, [x0, #8]
2440 ; CHECK-NEXT: sshll.2d v0, v0, #1
2442 %load1 = load <4 x i32>, ptr %A
2443 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2444 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
2445 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2449 define <8 x i8> @sqshli8b(ptr %A) nounwind {
2450 ; CHECK-LABEL: sqshli8b:
2452 ; CHECK-NEXT: ldr d0, [x0]
2453 ; CHECK-NEXT: sqshl.8b v0, v0, #1
2455 %tmp1 = load <8 x i8>, ptr %A
2456 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2460 define <4 x i16> @sqshli4h(ptr %A) nounwind {
2461 ; CHECK-LABEL: sqshli4h:
2463 ; CHECK-NEXT: ldr d0, [x0]
2464 ; CHECK-NEXT: sqshl.4h v0, v0, #1
2466 %tmp1 = load <4 x i16>, ptr %A
2467 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
2471 define <2 x i32> @sqshli2s(ptr %A) nounwind {
2472 ; CHECK-LABEL: sqshli2s:
2474 ; CHECK-NEXT: ldr d0, [x0]
2475 ; CHECK-NEXT: sqshl.2s v0, v0, #1
2477 %tmp1 = load <2 x i32>, ptr %A
2478 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
2482 define <16 x i8> @sqshli16b(ptr %A) nounwind {
2483 ; CHECK-LABEL: sqshli16b:
2485 ; CHECK-NEXT: ldr q0, [x0]
2486 ; CHECK-NEXT: sqshl.16b v0, v0, #1
2488 %tmp1 = load <16 x i8>, ptr %A
2489 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2493 define <8 x i16> @sqshli8h(ptr %A) nounwind {
2494 ; CHECK-LABEL: sqshli8h:
2496 ; CHECK-NEXT: ldr q0, [x0]
2497 ; CHECK-NEXT: sqshl.8h v0, v0, #1
2499 %tmp1 = load <8 x i16>, ptr %A
2500 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2504 define <4 x i32> @sqshli4s(ptr %A) nounwind {
2505 ; CHECK-LABEL: sqshli4s:
2507 ; CHECK-NEXT: ldr q0, [x0]
2508 ; CHECK-NEXT: sqshl.4s v0, v0, #1
2510 %tmp1 = load <4 x i32>, ptr %A
2511 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2515 define <2 x i64> @sqshli2d(ptr %A) nounwind {
2516 ; CHECK-LABEL: sqshli2d:
2518 ; CHECK-NEXT: ldr q0, [x0]
2519 ; CHECK-NEXT: sqshl.2d v0, v0, #1
2521 %tmp1 = load <2 x i64>, ptr %A
2522 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
2526 define <8 x i8> @uqshli8b(ptr %A) nounwind {
2527 ; CHECK-LABEL: uqshli8b:
2529 ; CHECK-NEXT: ldr d0, [x0]
2530 ; CHECK-NEXT: uqshl.8b v0, v0, #1
2532 %tmp1 = load <8 x i8>, ptr %A
2533 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2537 define <8 x i8> @uqshli8b_1(ptr %A) nounwind {
2538 ; CHECK-LABEL: uqshli8b_1:
2540 ; CHECK-NEXT: movi.8b v1, #8
2541 ; CHECK-NEXT: ldr d0, [x0]
2542 ; CHECK-NEXT: uqshl.8b v0, v0, v1
2544 %tmp1 = load <8 x i8>, ptr %A
2545 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
2549 define <4 x i16> @uqshli4h(ptr %A) nounwind {
2550 ; CHECK-LABEL: uqshli4h:
2552 ; CHECK-NEXT: ldr d0, [x0]
2553 ; CHECK-NEXT: uqshl.4h v0, v0, #1
2555 %tmp1 = load <4 x i16>, ptr %A
2556 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
2560 define <2 x i32> @uqshli2s(ptr %A) nounwind {
2561 ; CHECK-LABEL: uqshli2s:
2563 ; CHECK-NEXT: ldr d0, [x0]
2564 ; CHECK-NEXT: uqshl.2s v0, v0, #1
2566 %tmp1 = load <2 x i32>, ptr %A
2567 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
2571 define <16 x i8> @uqshli16b(ptr %A) nounwind {
2572 ; CHECK-LABEL: uqshli16b:
2574 ; CHECK-NEXT: ldr q0, [x0]
2575 ; CHECK-NEXT: uqshl.16b v0, v0, #1
2577 %tmp1 = load <16 x i8>, ptr %A
2578 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2582 define <8 x i16> @uqshli8h(ptr %A) nounwind {
2583 ; CHECK-LABEL: uqshli8h:
2585 ; CHECK-NEXT: ldr q0, [x0]
2586 ; CHECK-NEXT: uqshl.8h v0, v0, #1
2588 %tmp1 = load <8 x i16>, ptr %A
2589 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2593 define <4 x i32> @uqshli4s(ptr %A) nounwind {
2594 ; CHECK-LABEL: uqshli4s:
2596 ; CHECK-NEXT: ldr q0, [x0]
2597 ; CHECK-NEXT: uqshl.4s v0, v0, #1
2599 %tmp1 = load <4 x i32>, ptr %A
2600 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2604 define <2 x i64> @uqshli2d(ptr %A) nounwind {
2605 ; CHECK-LABEL: uqshli2d:
2607 ; CHECK-NEXT: ldr q0, [x0]
2608 ; CHECK-NEXT: uqshl.2d v0, v0, #1
2610 %tmp1 = load <2 x i64>, ptr %A
2611 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
2615 define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind {
2616 ; CHECK-LABEL: ursra8b:
2618 ; CHECK-NEXT: ldr d1, [x0]
2619 ; CHECK-NEXT: ldr d0, [x1]
2620 ; CHECK-NEXT: ursra.8b v0, v1, #1
2622 %tmp1 = load <8 x i8>, ptr %A
2623 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
2624 %tmp4 = load <8 x i8>, ptr %B
2625 %tmp5 = add <8 x i8> %tmp3, %tmp4
2629 define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind {
2630 ; CHECK-LABEL: ursra4h:
2632 ; CHECK-NEXT: ldr d1, [x0]
2633 ; CHECK-NEXT: ldr d0, [x1]
2634 ; CHECK-NEXT: ursra.4h v0, v1, #1
2636 %tmp1 = load <4 x i16>, ptr %A
2637 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
2638 %tmp4 = load <4 x i16>, ptr %B
2639 %tmp5 = add <4 x i16> %tmp3, %tmp4
2643 define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind {
2644 ; CHECK-LABEL: ursra2s:
2646 ; CHECK-NEXT: ldr d1, [x0]
2647 ; CHECK-NEXT: ldr d0, [x1]
2648 ; CHECK-NEXT: ursra.2s v0, v1, #1
2650 %tmp1 = load <2 x i32>, ptr %A
2651 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
2652 %tmp4 = load <2 x i32>, ptr %B
2653 %tmp5 = add <2 x i32> %tmp3, %tmp4
2657 define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind {
2658 ; CHECK-LABEL: ursra16b:
2660 ; CHECK-NEXT: ldr q1, [x0]
2661 ; CHECK-NEXT: ldr q0, [x1]
2662 ; CHECK-NEXT: ursra.16b v0, v1, #1
2664 %tmp1 = load <16 x i8>, ptr %A
2665 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
2666 %tmp4 = load <16 x i8>, ptr %B
2667 %tmp5 = add <16 x i8> %tmp3, %tmp4
2671 define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind {
2672 ; CHECK-LABEL: ursra8h:
2674 ; CHECK-NEXT: ldr q1, [x0]
2675 ; CHECK-NEXT: ldr q0, [x1]
2676 ; CHECK-NEXT: ursra.8h v0, v1, #1
2678 %tmp1 = load <8 x i16>, ptr %A
2679 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
2680 %tmp4 = load <8 x i16>, ptr %B
2681 %tmp5 = add <8 x i16> %tmp3, %tmp4
2685 define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind {
2686 ; CHECK-LABEL: ursra4s:
2688 ; CHECK-NEXT: ldr q1, [x0]
2689 ; CHECK-NEXT: ldr q0, [x1]
2690 ; CHECK-NEXT: ursra.4s v0, v1, #1
2692 %tmp1 = load <4 x i32>, ptr %A
2693 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
2694 %tmp4 = load <4 x i32>, ptr %B
2695 %tmp5 = add <4 x i32> %tmp3, %tmp4
2699 define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind {
2700 ; CHECK-LABEL: ursra2d:
2702 ; CHECK-NEXT: ldr q1, [x0]
2703 ; CHECK-NEXT: ldr q0, [x1]
2704 ; CHECK-NEXT: ursra.2d v0, v1, #1
2706 %tmp1 = load <2 x i64>, ptr %A
2707 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
2708 %tmp4 = load <2 x i64>, ptr %B
2709 %tmp5 = add <2 x i64> %tmp3, %tmp4
2713 define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind {
2714 ; CHECK-LABEL: ursra1d:
2716 ; CHECK-NEXT: ldr d1, [x0]
2717 ; CHECK-NEXT: ldr d0, [x1]
2718 ; CHECK-NEXT: ursra d0, d1, #1
2720 %tmp1 = load <1 x i64>, ptr %A
2721 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
2722 %tmp4 = load <1 x i64>, ptr %B
2723 %tmp5 = add <1 x i64> %tmp3, %tmp4
2727 define i64 @ursra_scalar(ptr %A, ptr %B) nounwind {
2728 ; CHECK-LABEL: ursra_scalar:
2730 ; CHECK-NEXT: ldr d0, [x0]
2731 ; CHECK-NEXT: ldr d1, [x1]
2732 ; CHECK-NEXT: ursra d1, d0, #1
2733 ; CHECK-NEXT: fmov x0, d1
2735 %tmp1 = load i64, ptr %A
2736 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
2737 %tmp4 = load i64, ptr %B
2738 %tmp5 = add i64 %tmp3, %tmp4
2742 define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind {
2743 ; CHECK-LABEL: srsra8b:
2745 ; CHECK-NEXT: ldr d1, [x0]
2746 ; CHECK-NEXT: ldr d0, [x1]
2747 ; CHECK-NEXT: srsra.8b v0, v1, #1
2749 %tmp1 = load <8 x i8>, ptr %A
2750 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
2751 %tmp4 = load <8 x i8>, ptr %B
2752 %tmp5 = add <8 x i8> %tmp3, %tmp4
2756 define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind {
2757 ; CHECK-LABEL: srsra4h:
2759 ; CHECK-NEXT: ldr d1, [x0]
2760 ; CHECK-NEXT: ldr d0, [x1]
2761 ; CHECK-NEXT: srsra.4h v0, v1, #1
2763 %tmp1 = load <4 x i16>, ptr %A
2764 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
2765 %tmp4 = load <4 x i16>, ptr %B
2766 %tmp5 = add <4 x i16> %tmp3, %tmp4
2770 define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind {
2771 ; CHECK-LABEL: srsra2s:
2773 ; CHECK-NEXT: ldr d1, [x0]
2774 ; CHECK-NEXT: ldr d0, [x1]
2775 ; CHECK-NEXT: srsra.2s v0, v1, #1
2777 %tmp1 = load <2 x i32>, ptr %A
2778 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
2779 %tmp4 = load <2 x i32>, ptr %B
2780 %tmp5 = add <2 x i32> %tmp3, %tmp4
2784 define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind {
2785 ; CHECK-LABEL: srsra16b:
2787 ; CHECK-NEXT: ldr q1, [x0]
2788 ; CHECK-NEXT: ldr q0, [x1]
2789 ; CHECK-NEXT: srsra.16b v0, v1, #1
2791 %tmp1 = load <16 x i8>, ptr %A
2792 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
2793 %tmp4 = load <16 x i8>, ptr %B
2794 %tmp5 = add <16 x i8> %tmp3, %tmp4
2798 define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind {
2799 ; CHECK-LABEL: srsra8h:
2801 ; CHECK-NEXT: ldr q1, [x0]
2802 ; CHECK-NEXT: ldr q0, [x1]
2803 ; CHECK-NEXT: srsra.8h v0, v1, #1
2805 %tmp1 = load <8 x i16>, ptr %A
2806 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
2807 %tmp4 = load <8 x i16>, ptr %B
2808 %tmp5 = add <8 x i16> %tmp3, %tmp4
2812 define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind {
2813 ; CHECK-LABEL: srsra4s:
2815 ; CHECK-NEXT: ldr q1, [x0]
2816 ; CHECK-NEXT: ldr q0, [x1]
2817 ; CHECK-NEXT: srsra.4s v0, v1, #1
2819 %tmp1 = load <4 x i32>, ptr %A
2820 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
2821 %tmp4 = load <4 x i32>, ptr %B
2822 %tmp5 = add <4 x i32> %tmp3, %tmp4
2826 define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind {
2827 ; CHECK-LABEL: srsra2d:
2829 ; CHECK-NEXT: ldr q1, [x0]
2830 ; CHECK-NEXT: ldr q0, [x1]
2831 ; CHECK-NEXT: srsra.2d v0, v1, #1
2833 %tmp1 = load <2 x i64>, ptr %A
2834 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
2835 %tmp4 = load <2 x i64>, ptr %B
2836 %tmp5 = add <2 x i64> %tmp3, %tmp4
2840 define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind {
2841 ; CHECK-LABEL: srsra1d:
2843 ; CHECK-NEXT: ldr d1, [x0]
2844 ; CHECK-NEXT: ldr d0, [x1]
2845 ; CHECK-NEXT: srsra d0, d1, #1
2847 %tmp1 = load <1 x i64>, ptr %A
2848 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
2849 %tmp4 = load <1 x i64>, ptr %B
2850 %tmp5 = add <1 x i64> %tmp3, %tmp4
2854 define i64 @srsra_scalar(ptr %A, ptr %B) nounwind {
2855 ; CHECK-LABEL: srsra_scalar:
2857 ; CHECK-NEXT: ldr d0, [x0]
2858 ; CHECK-NEXT: ldr d1, [x1]
2859 ; CHECK-NEXT: srsra d1, d0, #1
2860 ; CHECK-NEXT: fmov x0, d1
2862 %tmp1 = load i64, ptr %A
2863 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
2864 %tmp4 = load i64, ptr %B
2865 %tmp5 = add i64 %tmp3, %tmp4
2869 define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind {
2870 ; CHECK-LABEL: usra8b:
2872 ; CHECK-NEXT: ldr d1, [x0]
2873 ; CHECK-NEXT: ldr d0, [x1]
2874 ; CHECK-NEXT: usra.8b v0, v1, #1
2876 %tmp1 = load <8 x i8>, ptr %A
2877 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2878 %tmp4 = load <8 x i8>, ptr %B
2879 %tmp5 = add <8 x i8> %tmp3, %tmp4
2883 define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind {
2884 ; CHECK-LABEL: usra4h:
2886 ; CHECK-NEXT: ldr d1, [x0]
2887 ; CHECK-NEXT: ldr d0, [x1]
2888 ; CHECK-NEXT: usra.4h v0, v1, #1
2890 %tmp1 = load <4 x i16>, ptr %A
2891 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
2892 %tmp4 = load <4 x i16>, ptr %B
2893 %tmp5 = add <4 x i16> %tmp3, %tmp4
2897 define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind {
2898 ; CHECK-LABEL: usra2s:
2900 ; CHECK-NEXT: ldr d1, [x0]
2901 ; CHECK-NEXT: ldr d0, [x1]
2902 ; CHECK-NEXT: usra.2s v0, v1, #1
2904 %tmp1 = load <2 x i32>, ptr %A
2905 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
2906 %tmp4 = load <2 x i32>, ptr %B
2907 %tmp5 = add <2 x i32> %tmp3, %tmp4
2911 define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind {
2912 ; CHECK-LABEL: usra16b:
2914 ; CHECK-NEXT: ldr q1, [x0]
2915 ; CHECK-NEXT: ldr q0, [x1]
2916 ; CHECK-NEXT: usra.16b v0, v1, #1
2918 %tmp1 = load <16 x i8>, ptr %A
2919 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2920 %tmp4 = load <16 x i8>, ptr %B
2921 %tmp5 = add <16 x i8> %tmp3, %tmp4
2925 define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind {
2926 ; CHECK-LABEL: usra8h:
2928 ; CHECK-NEXT: ldr q1, [x0]
2929 ; CHECK-NEXT: ldr q0, [x1]
2930 ; CHECK-NEXT: usra.8h v0, v1, #1
2932 %tmp1 = load <8 x i16>, ptr %A
2933 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2934 %tmp4 = load <8 x i16>, ptr %B
2935 %tmp5 = add <8 x i16> %tmp3, %tmp4
2939 define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind {
2940 ; CHECK-LABEL: usra4s:
2942 ; CHECK-NEXT: ldr q1, [x0]
2943 ; CHECK-NEXT: ldr q0, [x1]
2944 ; CHECK-NEXT: usra.4s v0, v1, #1
2946 %tmp1 = load <4 x i32>, ptr %A
2947 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
2948 %tmp4 = load <4 x i32>, ptr %B
2949 %tmp5 = add <4 x i32> %tmp3, %tmp4
2953 define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind {
2954 ; CHECK-LABEL: usra2d:
2956 ; CHECK-NEXT: ldr q1, [x0]
2957 ; CHECK-NEXT: ldr q0, [x1]
2958 ; CHECK-NEXT: usra.2d v0, v1, #1
2960 %tmp1 = load <2 x i64>, ptr %A
2961 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
2962 %tmp4 = load <2 x i64>, ptr %B
2963 %tmp5 = add <2 x i64> %tmp3, %tmp4
2967 define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind {
2968 ; CHECK-LABEL: usra1d:
2970 ; CHECK-NEXT: ldr d1, [x0]
2971 ; CHECK-NEXT: ldr d0, [x1]
2972 ; CHECK-NEXT: usra d0, d1, #1
2974 %tmp1 = load <1 x i64>, ptr %A
2975 %tmp3 = lshr <1 x i64> %tmp1, <i64 1>
2976 %tmp4 = load <1 x i64>, ptr %B
2977 %tmp5 = add <1 x i64> %tmp3, %tmp4
2981 define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind {
2982 ; CHECK-LABEL: ssra8b:
2984 ; CHECK-NEXT: ldr d1, [x0]
2985 ; CHECK-NEXT: ldr d0, [x1]
2986 ; CHECK-NEXT: ssra.8b v0, v1, #1
2988 %tmp1 = load <8 x i8>, ptr %A
2989 %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2990 %tmp4 = load <8 x i8>, ptr %B
2991 %tmp5 = add <8 x i8> %tmp3, %tmp4
2995 define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind {
2996 ; CHECK-LABEL: ssra4h:
2998 ; CHECK-NEXT: ldr d1, [x0]
2999 ; CHECK-NEXT: ldr d0, [x1]
3000 ; CHECK-NEXT: ssra.4h v0, v1, #1
3002 %tmp1 = load <4 x i16>, ptr %A
3003 %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
3004 %tmp4 = load <4 x i16>, ptr %B
3005 %tmp5 = add <4 x i16> %tmp3, %tmp4
3009 define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind {
3010 ; CHECK-LABEL: ssra2s:
3012 ; CHECK-NEXT: ldr d1, [x0]
3013 ; CHECK-NEXT: ldr d0, [x1]
3014 ; CHECK-NEXT: ssra.2s v0, v1, #1
3016 %tmp1 = load <2 x i32>, ptr %A
3017 %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
3018 %tmp4 = load <2 x i32>, ptr %B
3019 %tmp5 = add <2 x i32> %tmp3, %tmp4
3023 define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind {
3024 ; CHECK-LABEL: ssra16b:
3026 ; CHECK-NEXT: ldr q1, [x0]
3027 ; CHECK-NEXT: ldr q0, [x1]
3028 ; CHECK-NEXT: ssra.16b v0, v1, #1
3030 %tmp1 = load <16 x i8>, ptr %A
3031 %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3032 %tmp4 = load <16 x i8>, ptr %B
3033 %tmp5 = add <16 x i8> %tmp3, %tmp4
3037 define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind {
3038 ; CHECK-LABEL: ssra8h:
3040 ; CHECK-NEXT: ldr q1, [x0]
3041 ; CHECK-NEXT: ldr q0, [x1]
3042 ; CHECK-NEXT: ssra.8h v0, v1, #1
3044 %tmp1 = load <8 x i16>, ptr %A
3045 %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3046 %tmp4 = load <8 x i16>, ptr %B
3047 %tmp5 = add <8 x i16> %tmp3, %tmp4
3051 define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind {
3052 ; CHECK-LABEL: ssra4s:
3054 ; CHECK-NEXT: ldr q1, [x0]
3055 ; CHECK-NEXT: ldr q0, [x1]
3056 ; CHECK-NEXT: ssra.4s v0, v1, #1
3058 %tmp1 = load <4 x i32>, ptr %A
3059 %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3060 %tmp4 = load <4 x i32>, ptr %B
3061 %tmp5 = add <4 x i32> %tmp3, %tmp4
3065 define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind {
3066 ; CHECK-LABEL: ssra2d:
3068 ; CHECK-NEXT: ldr q1, [x0]
3069 ; CHECK-NEXT: ldr q0, [x1]
3070 ; CHECK-NEXT: ssra.2d v0, v1, #1
3072 %tmp1 = load <2 x i64>, ptr %A
3073 %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
3074 %tmp4 = load <2 x i64>, ptr %B
3075 %tmp5 = add <2 x i64> %tmp3, %tmp4
3079 define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind {
3080 ; CHECK-LABEL: shr_orr8b:
3082 ; CHECK-NEXT: ldr d0, [x0]
3083 ; CHECK-NEXT: ldr d1, [x1]
3084 ; CHECK-NEXT: ushr.8b v0, v0, #1
3085 ; CHECK-NEXT: orr.8b v0, v0, v1
3087 %tmp1 = load <8 x i8>, ptr %A
3088 %tmp4 = load <8 x i8>, ptr %B
3089 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3090 %tmp5 = or <8 x i8> %tmp3, %tmp4
3094 define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind {
3095 ; CHECK-LABEL: shr_orr4h:
3097 ; CHECK-NEXT: ldr d0, [x0]
3098 ; CHECK-NEXT: ldr d1, [x1]
3099 ; CHECK-NEXT: ushr.4h v0, v0, #1
3100 ; CHECK-NEXT: orr.8b v0, v0, v1
3102 %tmp1 = load <4 x i16>, ptr %A
3103 %tmp4 = load <4 x i16>, ptr %B
3104 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
3105 %tmp5 = or <4 x i16> %tmp3, %tmp4
3109 define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind {
3110 ; CHECK-LABEL: shr_orr2s:
3112 ; CHECK-NEXT: ldr d0, [x0]
3113 ; CHECK-NEXT: ldr d1, [x1]
3114 ; CHECK-NEXT: ushr.2s v0, v0, #1
3115 ; CHECK-NEXT: orr.8b v0, v0, v1
3117 %tmp1 = load <2 x i32>, ptr %A
3118 %tmp4 = load <2 x i32>, ptr %B
3119 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
3120 %tmp5 = or <2 x i32> %tmp3, %tmp4
3124 define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind {
3125 ; CHECK-LABEL: shr_orr16b:
3127 ; CHECK-NEXT: ldr q0, [x0]
3128 ; CHECK-NEXT: ldr q1, [x1]
3129 ; CHECK-NEXT: ushr.16b v0, v0, #1
3130 ; CHECK-NEXT: orr.16b v0, v0, v1
3132 %tmp1 = load <16 x i8>, ptr %A
3133 %tmp4 = load <16 x i8>, ptr %B
3134 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3135 %tmp5 = or <16 x i8> %tmp3, %tmp4
3139 define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind {
3140 ; CHECK-LABEL: shr_orr8h:
3142 ; CHECK-NEXT: ldr q0, [x0]
3143 ; CHECK-NEXT: ldr q1, [x1]
3144 ; CHECK-NEXT: ushr.8h v0, v0, #1
3145 ; CHECK-NEXT: orr.16b v0, v0, v1
3147 %tmp1 = load <8 x i16>, ptr %A
3148 %tmp4 = load <8 x i16>, ptr %B
3149 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3150 %tmp5 = or <8 x i16> %tmp3, %tmp4
3154 define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind {
3155 ; CHECK-LABEL: shr_orr4s:
3157 ; CHECK-NEXT: ldr q0, [x0]
3158 ; CHECK-NEXT: ldr q1, [x1]
3159 ; CHECK-NEXT: ushr.4s v0, v0, #1
3160 ; CHECK-NEXT: orr.16b v0, v0, v1
3162 %tmp1 = load <4 x i32>, ptr %A
3163 %tmp4 = load <4 x i32>, ptr %B
3164 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3165 %tmp5 = or <4 x i32> %tmp3, %tmp4
3169 define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind {
3170 ; CHECK-LABEL: shr_orr2d:
3172 ; CHECK-NEXT: ldr q0, [x0]
3173 ; CHECK-NEXT: ldr q1, [x1]
3174 ; CHECK-NEXT: ushr.2d v0, v0, #1
3175 ; CHECK-NEXT: orr.16b v0, v0, v1
3177 %tmp1 = load <2 x i64>, ptr %A
3178 %tmp4 = load <2 x i64>, ptr %B
3179 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
3180 %tmp5 = or <2 x i64> %tmp3, %tmp4
3184 define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind {
3185 ; CHECK-LABEL: shl_orr8b:
3187 ; CHECK-NEXT: ldr d0, [x0]
3188 ; CHECK-NEXT: ldr d1, [x1]
3189 ; CHECK-NEXT: add.8b v0, v0, v0
3190 ; CHECK-NEXT: orr.8b v0, v0, v1
3192 %tmp1 = load <8 x i8>, ptr %A
3193 %tmp4 = load <8 x i8>, ptr %B
3194 %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3195 %tmp5 = or <8 x i8> %tmp3, %tmp4
3199 define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind {
3200 ; CHECK-LABEL: shl_orr4h:
3202 ; CHECK-NEXT: ldr d0, [x0]
3203 ; CHECK-NEXT: ldr d1, [x1]
3204 ; CHECK-NEXT: add.4h v0, v0, v0
3205 ; CHECK-NEXT: orr.8b v0, v0, v1
3207 %tmp1 = load <4 x i16>, ptr %A
3208 %tmp4 = load <4 x i16>, ptr %B
3209 %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
3210 %tmp5 = or <4 x i16> %tmp3, %tmp4
3214 define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind {
3215 ; CHECK-LABEL: shl_orr2s:
3217 ; CHECK-NEXT: ldr d0, [x0]
3218 ; CHECK-NEXT: ldr d1, [x1]
3219 ; CHECK-NEXT: add.2s v0, v0, v0
3220 ; CHECK-NEXT: orr.8b v0, v0, v1
3222 %tmp1 = load <2 x i32>, ptr %A
3223 %tmp4 = load <2 x i32>, ptr %B
3224 %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
3225 %tmp5 = or <2 x i32> %tmp3, %tmp4
3229 define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind {
3230 ; CHECK-LABEL: shl_orr16b:
3232 ; CHECK-NEXT: ldr q0, [x0]
3233 ; CHECK-NEXT: ldr q1, [x1]
3234 ; CHECK-NEXT: add.16b v0, v0, v0
3235 ; CHECK-NEXT: orr.16b v0, v0, v1
3237 %tmp1 = load <16 x i8>, ptr %A
3238 %tmp4 = load <16 x i8>, ptr %B
3239 %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3240 %tmp5 = or <16 x i8> %tmp3, %tmp4
3244 define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind {
3245 ; CHECK-LABEL: shl_orr8h:
3247 ; CHECK-NEXT: ldr q0, [x0]
3248 ; CHECK-NEXT: ldr q1, [x1]
3249 ; CHECK-NEXT: add.8h v0, v0, v0
3250 ; CHECK-NEXT: orr.16b v0, v0, v1
3252 %tmp1 = load <8 x i16>, ptr %A
3253 %tmp4 = load <8 x i16>, ptr %B
3254 %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3255 %tmp5 = or <8 x i16> %tmp3, %tmp4
3259 define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind {
3260 ; CHECK-LABEL: shl_orr4s:
3262 ; CHECK-NEXT: ldr q0, [x0]
3263 ; CHECK-NEXT: ldr q1, [x1]
3264 ; CHECK-NEXT: add.4s v0, v0, v0
3265 ; CHECK-NEXT: orr.16b v0, v0, v1
3267 %tmp1 = load <4 x i32>, ptr %A
3268 %tmp4 = load <4 x i32>, ptr %B
3269 %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3270 %tmp5 = or <4 x i32> %tmp3, %tmp4
3274 define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind {
3275 ; CHECK-LABEL: shl_orr2d:
3277 ; CHECK-NEXT: ldr q0, [x0]
3278 ; CHECK-NEXT: ldr q1, [x1]
3279 ; CHECK-NEXT: add.2d v0, v0, v0
3280 ; CHECK-NEXT: orr.16b v0, v0, v1
3282 %tmp1 = load <2 x i64>, ptr %A
3283 %tmp4 = load <2 x i64>, ptr %B
3284 %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
3285 %tmp5 = or <2 x i64> %tmp3, %tmp4
3289 define <8 x i16> @shll(<8 x i8> %in) {
3290 ; CHECK-LABEL: shll:
3292 ; CHECK-NEXT: shll.8h v0, v0, #8
3294 %ext = zext <8 x i8> %in to <8 x i16>
3295 %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
3299 define <4 x i32> @shll_high(<8 x i16> %in) {
3300 ; CHECK-LABEL: shll_high:
3302 ; CHECK-NEXT: shll2.4s v0, v0, #16
3304 %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3305 %ext = zext <4 x i16> %extract to <4 x i32>
3306 %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
3310 define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind {
3311 ; CHECK-LABEL: sli8b:
3313 ; CHECK-NEXT: ldr d0, [x0]
3314 ; CHECK-NEXT: ldr d1, [x1]
3315 ; CHECK-NEXT: sli.8b v0, v1, #1
3317 %tmp1 = load <8 x i8>, ptr %A
3318 %tmp2 = load <8 x i8>, ptr %B
3319 %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
3323 define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind {
3324 ; CHECK-LABEL: sli4h:
3326 ; CHECK-NEXT: ldr d0, [x0]
3327 ; CHECK-NEXT: ldr d1, [x1]
3328 ; CHECK-NEXT: sli.4h v0, v1, #1
3330 %tmp1 = load <4 x i16>, ptr %A
3331 %tmp2 = load <4 x i16>, ptr %B
3332 %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
3336 define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind {
3337 ; CHECK-LABEL: sli2s:
3339 ; CHECK-NEXT: ldr d0, [x0]
3340 ; CHECK-NEXT: ldr d1, [x1]
3341 ; CHECK-NEXT: sli.2s v0, v1, #1
3343 %tmp1 = load <2 x i32>, ptr %A
3344 %tmp2 = load <2 x i32>, ptr %B
3345 %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
3349 define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind {
3350 ; CHECK-LABEL: sli1d:
3352 ; CHECK-NEXT: ldr d0, [x0]
3353 ; CHECK-NEXT: ldr d1, [x1]
3354 ; CHECK-NEXT: sli d0, d1, #1
3356 %tmp1 = load <1 x i64>, ptr %A
3357 %tmp2 = load <1 x i64>, ptr %B
3358 %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
3362 define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind {
3363 ; CHECK-LABEL: sli16b:
3365 ; CHECK-NEXT: ldr q0, [x0]
3366 ; CHECK-NEXT: ldr q1, [x1]
3367 ; CHECK-NEXT: sli.16b v0, v1, #1
3369 %tmp1 = load <16 x i8>, ptr %A
3370 %tmp2 = load <16 x i8>, ptr %B
3371 %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
3375 define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind {
3376 ; CHECK-LABEL: sli8h:
3378 ; CHECK-NEXT: ldr q0, [x0]
3379 ; CHECK-NEXT: ldr q1, [x1]
3380 ; CHECK-NEXT: sli.8h v0, v1, #1
3382 %tmp1 = load <8 x i16>, ptr %A
3383 %tmp2 = load <8 x i16>, ptr %B
3384 %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
3388 define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind {
3389 ; CHECK-LABEL: sli4s:
3391 ; CHECK-NEXT: ldr q0, [x0]
3392 ; CHECK-NEXT: ldr q1, [x1]
3393 ; CHECK-NEXT: sli.4s v0, v1, #1
3395 %tmp1 = load <4 x i32>, ptr %A
3396 %tmp2 = load <4 x i32>, ptr %B
3397 %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
3401 define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind {
3402 ; CHECK-LABEL: sli2d:
3404 ; CHECK-NEXT: ldr q0, [x0]
3405 ; CHECK-NEXT: ldr q1, [x1]
3406 ; CHECK-NEXT: sli.2d v0, v1, #1
3408 %tmp1 = load <2 x i64>, ptr %A
3409 %tmp2 = load <2 x i64>, ptr %B
3410 %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
3414 declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
3415 declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
3416 declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
3417 declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
3419 declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
3420 declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
3421 declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
3422 declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
3424 define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
3425 ; CHECK-LABEL: ashr_v1i64:
3427 ; CHECK-NEXT: neg d1, d1
3428 ; CHECK-NEXT: sshl d0, d0, d1
3430 %c = ashr <1 x i64> %a, %b
3434 define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3435 ; CHECK-LABEL: sqshl_zero_shift_amount:
3436 ; CHECK: // %bb.0: // %entry
3437 ; CHECK-NEXT: addp.2d v0, v0, v1
3438 ; CHECK-NEXT: str q0, [x0]
3441 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3442 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3443 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3447 define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3448 ; CHECK-LABEL: uqshl_zero_shift_amount:
3449 ; CHECK: // %bb.0: // %entry
3450 ; CHECK-NEXT: addp.2d v0, v0, v1
3451 ; CHECK-NEXT: str q0, [x0]
3454 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3455 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3456 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3460 define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3461 ; CHECK-LABEL: srshl_zero_shift_amount:
3462 ; CHECK: // %bb.0: // %entry
3463 ; CHECK-NEXT: addp.2d v0, v0, v1
3464 ; CHECK-NEXT: str q0, [x0]
3467 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3468 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3469 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3473 define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3474 ; CHECK-LABEL: urshl_zero_shift_amount:
3475 ; CHECK: // %bb.0: // %entry
3476 ; CHECK-NEXT: addp.2d v0, v0, v1
3477 ; CHECK-NEXT: str q0, [x0]
3480 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3481 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3482 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3486 define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3487 ; CHECK-LABEL: sqshlu_zero_shift_amount:
3488 ; CHECK: // %bb.0: // %entry
3489 ; CHECK-NEXT: addp.2d v0, v0, v1
3490 ; CHECK-NEXT: sqshlu.2d v0, v0, #0
3491 ; CHECK-NEXT: str q0, [x0]
3494 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3495 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3496 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3500 define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3501 ; CHECK-LABEL: sshl_zero_shift_amount:
3502 ; CHECK: // %bb.0: // %entry
3503 ; CHECK-NEXT: addp.2d v0, v0, v1
3504 ; CHECK-NEXT: str q0, [x0]
3507 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3508 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3509 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3513 define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
3514 ; CHECK-LABEL: ushl_zero_shift_amount:
3515 ; CHECK: // %bb.0: // %entry
3516 ; CHECK-NEXT: addp.2d v0, v0, v1
3517 ; CHECK-NEXT: str q0, [x0]
3520 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
3521 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
3522 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
3526 define <4 x i32> @sext_rshrn(<4 x i32> noundef %a) {
3527 ; CHECK-LABEL: sext_rshrn:
3528 ; CHECK: // %bb.0: // %entry
3529 ; CHECK-NEXT: rshrn.4h v0, v0, #13
3530 ; CHECK-NEXT: sshll.4s v0, v0, #0
3533 %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
3534 %vmovl.i = sext <4 x i16> %vrshrn_n1 to <4 x i32>
3535 ret <4 x i32> %vmovl.i
3538 define <4 x i32> @zext_rshrn(<4 x i32> noundef %a) {
3539 ; CHECK-LABEL: zext_rshrn:
3540 ; CHECK: // %bb.0: // %entry
3541 ; CHECK-NEXT: rshrn.4h v0, v0, #13
3542 ; CHECK-NEXT: ushll.4s v0, v0, #0
3545 %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
3546 %vmovl.i = zext <4 x i16> %vrshrn_n1 to <4 x i32>
3547 ret <4 x i32> %vmovl.i
3550 define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) {
3551 ; CHECK-LABEL: mul_rshrn:
3552 ; CHECK: // %bb.0: // %entry
3553 ; CHECK-NEXT: movi.4s v1, #3
3554 ; CHECK-NEXT: add.4s v0, v0, v1
3555 ; CHECK-NEXT: rshrn.4h v0, v0, #13
3558 %b = add <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
3559 %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 13)
3560 ret <4 x i16> %vrshrn_n1
3563 declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)