1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 ; CHECK-GI: warning: Instruction selection used fallback path for sqshl1d
6 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl1d_constant
7 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar
8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar_constant
9 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d
10 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d_constant
11 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar
12 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar_constant
13 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d
14 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d_constant
15 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar
16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar_constant
17 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d
18 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d_constant
19 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar
20 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar_constant
21 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d
22 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d_constant
23 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar
24 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar_constant
25 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d
26 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d_constant
27 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar
28 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar_constant
29 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr1d
30 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr_scalar
31 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr1d
32 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr_scalar
33 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu8b
34 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu4h
35 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu2s
36 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu16b
37 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu8h
38 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu4s
39 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu2d
40 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu1d_constant
41 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_i64_constant
42 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_i32_constant
43 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn1s
44 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn8b
45 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn4h
46 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn2s
47 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn16b
48 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn8h
49 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn4s
50 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun1s
51 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun8b
52 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun4h
53 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun2s
54 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun16b
55 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun8h
56 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun4s
57 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn1s
58 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn8b
59 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn4h
60 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn2s
61 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn16b
62 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn8h
63 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn4s
64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun1s
65 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun8b
66 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun4h
67 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun2s
68 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun16b
69 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun8h
70 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun4s
71 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn1s
72 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn8b
73 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn4h
74 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn2s
75 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn16b
76 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn8h
77 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn4s
78 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn1s
79 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn8b
80 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn4h
81 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn2s
82 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn16b
83 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn8h
84 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn4s
85 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_ushl_vscalar_constant_shift
86 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_ushl_scalar_constant_shift
87 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_vscalar_constant_shift
88 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift
89 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift_m1
90 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ursra1d
91 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ursra_scalar
92 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srsra1d
93 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srsra_scalar
94 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8b
95 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4h
96 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2s
97 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli1d
98 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli16b
99 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8h
100 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4s
101 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2d
102 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_zero_shift_amount
103 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lshr_trunc_v2i64_v2i8
104 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ashr_trunc_v2i64_v2i8
106 define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind {
107 ; CHECK-LABEL: sqshl8b:
109 ; CHECK-NEXT: ldr d0, [x0]
110 ; CHECK-NEXT: ldr d1, [x1]
111 ; CHECK-NEXT: sqshl v0.8b, v0.8b, v1.8b
113 %tmp1 = load <8 x i8>, ptr %A
114 %tmp2 = load <8 x i8>, ptr %B
115 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
119 define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind {
120 ; CHECK-LABEL: sqshl4h:
122 ; CHECK-NEXT: ldr d0, [x0]
123 ; CHECK-NEXT: ldr d1, [x1]
124 ; CHECK-NEXT: sqshl v0.4h, v0.4h, v1.4h
126 %tmp1 = load <4 x i16>, ptr %A
127 %tmp2 = load <4 x i16>, ptr %B
128 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
132 define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind {
133 ; CHECK-LABEL: sqshl2s:
135 ; CHECK-NEXT: ldr d0, [x0]
136 ; CHECK-NEXT: ldr d1, [x1]
137 ; CHECK-NEXT: sqshl v0.2s, v0.2s, v1.2s
139 %tmp1 = load <2 x i32>, ptr %A
140 %tmp2 = load <2 x i32>, ptr %B
141 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
145 define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind {
146 ; CHECK-LABEL: sqshl1d:
148 ; CHECK-NEXT: ldr d0, [x0]
149 ; CHECK-NEXT: ldr d1, [x1]
150 ; CHECK-NEXT: sqshl d0, d0, d1
152 %tmp1 = load <1 x i64>, ptr %A
153 %tmp2 = load <1 x i64>, ptr %B
154 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
158 define <1 x i64> @sqshl1d_constant(ptr %A) nounwind {
159 ; CHECK-LABEL: sqshl1d_constant:
161 ; CHECK-NEXT: ldr d0, [x0]
162 ; CHECK-NEXT: sqshl d0, d0, #1
164 %tmp1 = load <1 x i64>, ptr %A
165 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
169 define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind {
170 ; CHECK-LABEL: sqshl_scalar:
172 ; CHECK-NEXT: ldr x8, [x0]
173 ; CHECK-NEXT: ldr x9, [x1]
174 ; CHECK-NEXT: fmov d0, x8
175 ; CHECK-NEXT: fmov d1, x9
176 ; CHECK-NEXT: sqshl d0, d0, d1
177 ; CHECK-NEXT: fmov x0, d0
179 %tmp1 = load i64, ptr %A
180 %tmp2 = load i64, ptr %B
181 %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2)
185 define i64 @sqshl_scalar_constant(ptr %A) nounwind {
186 ; CHECK-LABEL: sqshl_scalar_constant:
188 ; CHECK-NEXT: ldr d0, [x0]
189 ; CHECK-NEXT: sqshl d0, d0, #1
190 ; CHECK-NEXT: fmov x0, d0
192 %tmp1 = load i64, ptr %A
193 %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1)
197 define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind {
198 ; CHECK-LABEL: uqshl8b:
200 ; CHECK-NEXT: ldr d0, [x0]
201 ; CHECK-NEXT: ldr d1, [x1]
202 ; CHECK-NEXT: uqshl v0.8b, v0.8b, v1.8b
204 %tmp1 = load <8 x i8>, ptr %A
205 %tmp2 = load <8 x i8>, ptr %B
206 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
210 define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind {
211 ; CHECK-LABEL: uqshl4h:
213 ; CHECK-NEXT: ldr d0, [x0]
214 ; CHECK-NEXT: ldr d1, [x1]
215 ; CHECK-NEXT: uqshl v0.4h, v0.4h, v1.4h
217 %tmp1 = load <4 x i16>, ptr %A
218 %tmp2 = load <4 x i16>, ptr %B
219 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
223 define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind {
224 ; CHECK-LABEL: uqshl2s:
226 ; CHECK-NEXT: ldr d0, [x0]
227 ; CHECK-NEXT: ldr d1, [x1]
228 ; CHECK-NEXT: uqshl v0.2s, v0.2s, v1.2s
230 %tmp1 = load <2 x i32>, ptr %A
231 %tmp2 = load <2 x i32>, ptr %B
232 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
236 define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind {
237 ; CHECK-LABEL: sqshl16b:
239 ; CHECK-NEXT: ldr q0, [x0]
240 ; CHECK-NEXT: ldr q1, [x1]
241 ; CHECK-NEXT: sqshl v0.16b, v0.16b, v1.16b
243 %tmp1 = load <16 x i8>, ptr %A
244 %tmp2 = load <16 x i8>, ptr %B
245 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
249 define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind {
250 ; CHECK-LABEL: sqshl8h:
252 ; CHECK-NEXT: ldr q0, [x0]
253 ; CHECK-NEXT: ldr q1, [x1]
254 ; CHECK-NEXT: sqshl v0.8h, v0.8h, v1.8h
256 %tmp1 = load <8 x i16>, ptr %A
257 %tmp2 = load <8 x i16>, ptr %B
258 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
262 define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind {
263 ; CHECK-LABEL: sqshl4s:
265 ; CHECK-NEXT: ldr q0, [x0]
266 ; CHECK-NEXT: ldr q1, [x1]
267 ; CHECK-NEXT: sqshl v0.4s, v0.4s, v1.4s
269 %tmp1 = load <4 x i32>, ptr %A
270 %tmp2 = load <4 x i32>, ptr %B
271 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
275 define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind {
276 ; CHECK-LABEL: sqshl2d:
278 ; CHECK-NEXT: ldr q0, [x0]
279 ; CHECK-NEXT: ldr q1, [x1]
280 ; CHECK-NEXT: sqshl v0.2d, v0.2d, v1.2d
282 %tmp1 = load <2 x i64>, ptr %A
283 %tmp2 = load <2 x i64>, ptr %B
284 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
288 define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind {
289 ; CHECK-LABEL: uqshl16b:
291 ; CHECK-NEXT: ldr q0, [x0]
292 ; CHECK-NEXT: ldr q1, [x1]
293 ; CHECK-NEXT: uqshl v0.16b, v0.16b, v1.16b
295 %tmp1 = load <16 x i8>, ptr %A
296 %tmp2 = load <16 x i8>, ptr %B
297 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
301 define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind {
302 ; CHECK-LABEL: uqshl8h:
304 ; CHECK-NEXT: ldr q0, [x0]
305 ; CHECK-NEXT: ldr q1, [x1]
306 ; CHECK-NEXT: uqshl v0.8h, v0.8h, v1.8h
308 %tmp1 = load <8 x i16>, ptr %A
309 %tmp2 = load <8 x i16>, ptr %B
310 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
314 define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind {
315 ; CHECK-LABEL: uqshl4s:
317 ; CHECK-NEXT: ldr q0, [x0]
318 ; CHECK-NEXT: ldr q1, [x1]
319 ; CHECK-NEXT: uqshl v0.4s, v0.4s, v1.4s
321 %tmp1 = load <4 x i32>, ptr %A
322 %tmp2 = load <4 x i32>, ptr %B
323 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
327 define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind {
328 ; CHECK-LABEL: uqshl2d:
330 ; CHECK-NEXT: ldr q0, [x0]
331 ; CHECK-NEXT: ldr q1, [x1]
332 ; CHECK-NEXT: uqshl v0.2d, v0.2d, v1.2d
334 %tmp1 = load <2 x i64>, ptr %A
335 %tmp2 = load <2 x i64>, ptr %B
336 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
340 define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind {
341 ; CHECK-LABEL: uqshl1d:
343 ; CHECK-NEXT: ldr d0, [x0]
344 ; CHECK-NEXT: ldr d1, [x1]
345 ; CHECK-NEXT: uqshl d0, d0, d1
347 %tmp1 = load <1 x i64>, ptr %A
348 %tmp2 = load <1 x i64>, ptr %B
349 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
353 define <1 x i64> @uqshl1d_constant(ptr %A) nounwind {
354 ; CHECK-LABEL: uqshl1d_constant:
356 ; CHECK-NEXT: ldr d0, [x0]
357 ; CHECK-NEXT: uqshl d0, d0, #1
359 %tmp1 = load <1 x i64>, ptr %A
360 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
364 define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind {
365 ; CHECK-LABEL: uqshl_scalar:
367 ; CHECK-NEXT: ldr x8, [x0]
368 ; CHECK-NEXT: ldr x9, [x1]
369 ; CHECK-NEXT: fmov d0, x8
370 ; CHECK-NEXT: fmov d1, x9
371 ; CHECK-NEXT: uqshl d0, d0, d1
372 ; CHECK-NEXT: fmov x0, d0
374 %tmp1 = load i64, ptr %A
375 %tmp2 = load i64, ptr %B
376 %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2)
380 define i64 @uqshl_scalar_constant(ptr %A) nounwind {
381 ; CHECK-LABEL: uqshl_scalar_constant:
383 ; CHECK-NEXT: ldr d0, [x0]
384 ; CHECK-NEXT: uqshl d0, d0, #1
385 ; CHECK-NEXT: fmov x0, d0
387 %tmp1 = load i64, ptr %A
388 %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1)
392 declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
393 declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
394 declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
395 declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
396 declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone
399 declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
400 declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
401 declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
402 declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
403 declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone
405 declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
406 declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
407 declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
408 declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
410 declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
411 declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
412 declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
413 declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
415 define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind {
416 ; CHECK-LABEL: srshl8b:
418 ; CHECK-NEXT: ldr d0, [x0]
419 ; CHECK-NEXT: ldr d1, [x1]
420 ; CHECK-NEXT: srshl v0.8b, v0.8b, v1.8b
422 %tmp1 = load <8 x i8>, ptr %A
423 %tmp2 = load <8 x i8>, ptr %B
424 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
428 define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind {
429 ; CHECK-LABEL: srshl4h:
431 ; CHECK-NEXT: ldr d0, [x0]
432 ; CHECK-NEXT: ldr d1, [x1]
433 ; CHECK-NEXT: srshl v0.4h, v0.4h, v1.4h
435 %tmp1 = load <4 x i16>, ptr %A
436 %tmp2 = load <4 x i16>, ptr %B
437 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
441 define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind {
442 ; CHECK-LABEL: srshl2s:
444 ; CHECK-NEXT: ldr d0, [x0]
445 ; CHECK-NEXT: ldr d1, [x1]
446 ; CHECK-NEXT: srshl v0.2s, v0.2s, v1.2s
448 %tmp1 = load <2 x i32>, ptr %A
449 %tmp2 = load <2 x i32>, ptr %B
450 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
454 define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind {
455 ; CHECK-LABEL: srshl1d:
457 ; CHECK-NEXT: ldr d0, [x0]
458 ; CHECK-NEXT: ldr d1, [x1]
459 ; CHECK-NEXT: srshl d0, d0, d1
461 %tmp1 = load <1 x i64>, ptr %A
462 %tmp2 = load <1 x i64>, ptr %B
463 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
467 define <1 x i64> @srshl1d_constant(ptr %A) nounwind {
468 ; CHECK-LABEL: srshl1d_constant:
470 ; CHECK-NEXT: mov w8, #1 // =0x1
471 ; CHECK-NEXT: ldr d0, [x0]
472 ; CHECK-NEXT: fmov d1, x8
473 ; CHECK-NEXT: srshl d0, d0, d1
475 %tmp1 = load <1 x i64>, ptr %A
476 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
480 define i64 @srshl_scalar(ptr %A, ptr %B) nounwind {
481 ; CHECK-LABEL: srshl_scalar:
483 ; CHECK-NEXT: ldr x8, [x0]
484 ; CHECK-NEXT: ldr x9, [x1]
485 ; CHECK-NEXT: fmov d0, x8
486 ; CHECK-NEXT: fmov d1, x9
487 ; CHECK-NEXT: srshl d0, d0, d1
488 ; CHECK-NEXT: fmov x0, d0
490 %tmp1 = load i64, ptr %A
491 %tmp2 = load i64, ptr %B
492 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2)
496 define i64 @srshl_scalar_constant(ptr %A) nounwind {
497 ; CHECK-LABEL: srshl_scalar_constant:
499 ; CHECK-NEXT: ldr x9, [x0]
500 ; CHECK-NEXT: mov w8, #1 // =0x1
501 ; CHECK-NEXT: fmov d1, x8
502 ; CHECK-NEXT: fmov d0, x9
503 ; CHECK-NEXT: srshl d0, d0, d1
504 ; CHECK-NEXT: fmov x0, d0
506 %tmp1 = load i64, ptr %A
507 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1)
511 define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind {
512 ; CHECK-LABEL: urshl8b:
514 ; CHECK-NEXT: ldr d0, [x0]
515 ; CHECK-NEXT: ldr d1, [x1]
516 ; CHECK-NEXT: urshl v0.8b, v0.8b, v1.8b
518 %tmp1 = load <8 x i8>, ptr %A
519 %tmp2 = load <8 x i8>, ptr %B
520 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
524 define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind {
525 ; CHECK-LABEL: urshl4h:
527 ; CHECK-NEXT: ldr d0, [x0]
528 ; CHECK-NEXT: ldr d1, [x1]
529 ; CHECK-NEXT: urshl v0.4h, v0.4h, v1.4h
531 %tmp1 = load <4 x i16>, ptr %A
532 %tmp2 = load <4 x i16>, ptr %B
533 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
537 define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind {
538 ; CHECK-LABEL: urshl2s:
540 ; CHECK-NEXT: ldr d0, [x0]
541 ; CHECK-NEXT: ldr d1, [x1]
542 ; CHECK-NEXT: urshl v0.2s, v0.2s, v1.2s
544 %tmp1 = load <2 x i32>, ptr %A
545 %tmp2 = load <2 x i32>, ptr %B
546 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
550 define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind {
551 ; CHECK-LABEL: urshl1d:
553 ; CHECK-NEXT: ldr d0, [x0]
554 ; CHECK-NEXT: ldr d1, [x1]
555 ; CHECK-NEXT: urshl d0, d0, d1
557 %tmp1 = load <1 x i64>, ptr %A
558 %tmp2 = load <1 x i64>, ptr %B
559 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
563 define <1 x i64> @urshl1d_constant(ptr %A) nounwind {
564 ; CHECK-LABEL: urshl1d_constant:
566 ; CHECK-NEXT: mov w8, #1 // =0x1
567 ; CHECK-NEXT: ldr d0, [x0]
568 ; CHECK-NEXT: fmov d1, x8
569 ; CHECK-NEXT: urshl d0, d0, d1
571 %tmp1 = load <1 x i64>, ptr %A
572 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
576 define i64 @urshl_scalar(ptr %A, ptr %B) nounwind {
577 ; CHECK-LABEL: urshl_scalar:
579 ; CHECK-NEXT: ldr x8, [x0]
580 ; CHECK-NEXT: ldr x9, [x1]
581 ; CHECK-NEXT: fmov d0, x8
582 ; CHECK-NEXT: fmov d1, x9
583 ; CHECK-NEXT: urshl d0, d0, d1
584 ; CHECK-NEXT: fmov x0, d0
586 %tmp1 = load i64, ptr %A
587 %tmp2 = load i64, ptr %B
588 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2)
592 define i64 @urshl_scalar_constant(ptr %A) nounwind {
593 ; CHECK-LABEL: urshl_scalar_constant:
595 ; CHECK-NEXT: ldr x9, [x0]
596 ; CHECK-NEXT: mov w8, #1 // =0x1
597 ; CHECK-NEXT: fmov d1, x8
598 ; CHECK-NEXT: fmov d0, x9
599 ; CHECK-NEXT: urshl d0, d0, d1
600 ; CHECK-NEXT: fmov x0, d0
602 %tmp1 = load i64, ptr %A
603 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1)
607 define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind {
608 ; CHECK-LABEL: srshl16b:
610 ; CHECK-NEXT: ldr q0, [x0]
611 ; CHECK-NEXT: ldr q1, [x1]
612 ; CHECK-NEXT: srshl v0.16b, v0.16b, v1.16b
614 %tmp1 = load <16 x i8>, ptr %A
615 %tmp2 = load <16 x i8>, ptr %B
616 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
620 define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind {
621 ; CHECK-LABEL: srshl8h:
623 ; CHECK-NEXT: ldr q0, [x0]
624 ; CHECK-NEXT: ldr q1, [x1]
625 ; CHECK-NEXT: srshl v0.8h, v0.8h, v1.8h
627 %tmp1 = load <8 x i16>, ptr %A
628 %tmp2 = load <8 x i16>, ptr %B
629 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
633 define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind {
634 ; CHECK-LABEL: srshl4s:
636 ; CHECK-NEXT: ldr q0, [x0]
637 ; CHECK-NEXT: ldr q1, [x1]
638 ; CHECK-NEXT: srshl v0.4s, v0.4s, v1.4s
640 %tmp1 = load <4 x i32>, ptr %A
641 %tmp2 = load <4 x i32>, ptr %B
642 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
646 define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind {
647 ; CHECK-LABEL: srshl2d:
649 ; CHECK-NEXT: ldr q0, [x0]
650 ; CHECK-NEXT: ldr q1, [x1]
651 ; CHECK-NEXT: srshl v0.2d, v0.2d, v1.2d
653 %tmp1 = load <2 x i64>, ptr %A
654 %tmp2 = load <2 x i64>, ptr %B
655 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
659 define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind {
660 ; CHECK-LABEL: urshl16b:
662 ; CHECK-NEXT: ldr q0, [x0]
663 ; CHECK-NEXT: ldr q1, [x1]
664 ; CHECK-NEXT: urshl v0.16b, v0.16b, v1.16b
666 %tmp1 = load <16 x i8>, ptr %A
667 %tmp2 = load <16 x i8>, ptr %B
668 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
672 define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind {
673 ; CHECK-LABEL: urshl8h:
675 ; CHECK-NEXT: ldr q0, [x0]
676 ; CHECK-NEXT: ldr q1, [x1]
677 ; CHECK-NEXT: urshl v0.8h, v0.8h, v1.8h
679 %tmp1 = load <8 x i16>, ptr %A
680 %tmp2 = load <8 x i16>, ptr %B
681 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
685 define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind {
686 ; CHECK-LABEL: urshl4s:
688 ; CHECK-NEXT: ldr q0, [x0]
689 ; CHECK-NEXT: ldr q1, [x1]
690 ; CHECK-NEXT: urshl v0.4s, v0.4s, v1.4s
692 %tmp1 = load <4 x i32>, ptr %A
693 %tmp2 = load <4 x i32>, ptr %B
694 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
698 define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind {
699 ; CHECK-LABEL: urshl2d:
701 ; CHECK-NEXT: ldr q0, [x0]
702 ; CHECK-NEXT: ldr q1, [x1]
703 ; CHECK-NEXT: urshl v0.2d, v0.2d, v1.2d
705 %tmp1 = load <2 x i64>, ptr %A
706 %tmp2 = load <2 x i64>, ptr %B
707 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
711 declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
712 declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
713 declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
714 declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
715 declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone
717 declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
718 declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
719 declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
720 declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
721 declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone
723 declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
724 declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
725 declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
726 declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
728 declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
729 declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
730 declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
731 declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
733 define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind {
734 ; CHECK-LABEL: sqrshl8b:
736 ; CHECK-NEXT: ldr d0, [x0]
737 ; CHECK-NEXT: ldr d1, [x1]
738 ; CHECK-NEXT: sqrshl v0.8b, v0.8b, v1.8b
740 %tmp1 = load <8 x i8>, ptr %A
741 %tmp2 = load <8 x i8>, ptr %B
742 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
746 define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind {
747 ; CHECK-LABEL: sqrshl4h:
749 ; CHECK-NEXT: ldr d0, [x0]
750 ; CHECK-NEXT: ldr d1, [x1]
751 ; CHECK-NEXT: sqrshl v0.4h, v0.4h, v1.4h
753 %tmp1 = load <4 x i16>, ptr %A
754 %tmp2 = load <4 x i16>, ptr %B
755 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
759 define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind {
760 ; CHECK-LABEL: sqrshl2s:
762 ; CHECK-NEXT: ldr d0, [x0]
763 ; CHECK-NEXT: ldr d1, [x1]
764 ; CHECK-NEXT: sqrshl v0.2s, v0.2s, v1.2s
766 %tmp1 = load <2 x i32>, ptr %A
767 %tmp2 = load <2 x i32>, ptr %B
768 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
772 define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind {
773 ; CHECK-LABEL: uqrshl8b:
775 ; CHECK-NEXT: ldr d0, [x0]
776 ; CHECK-NEXT: ldr d1, [x1]
777 ; CHECK-NEXT: uqrshl v0.8b, v0.8b, v1.8b
779 %tmp1 = load <8 x i8>, ptr %A
780 %tmp2 = load <8 x i8>, ptr %B
781 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
785 define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind {
786 ; CHECK-LABEL: uqrshl4h:
788 ; CHECK-NEXT: ldr d0, [x0]
789 ; CHECK-NEXT: ldr d1, [x1]
790 ; CHECK-NEXT: uqrshl v0.4h, v0.4h, v1.4h
792 %tmp1 = load <4 x i16>, ptr %A
793 %tmp2 = load <4 x i16>, ptr %B
794 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
798 define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind {
799 ; CHECK-LABEL: uqrshl2s:
801 ; CHECK-NEXT: ldr d0, [x0]
802 ; CHECK-NEXT: ldr d1, [x1]
803 ; CHECK-NEXT: uqrshl v0.2s, v0.2s, v1.2s
805 %tmp1 = load <2 x i32>, ptr %A
806 %tmp2 = load <2 x i32>, ptr %B
807 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
811 define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind {
812 ; CHECK-LABEL: sqrshl16b:
814 ; CHECK-NEXT: ldr q0, [x0]
815 ; CHECK-NEXT: ldr q1, [x1]
816 ; CHECK-NEXT: sqrshl v0.16b, v0.16b, v1.16b
818 %tmp1 = load <16 x i8>, ptr %A
819 %tmp2 = load <16 x i8>, ptr %B
820 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
824 define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind {
825 ; CHECK-LABEL: sqrshl8h:
827 ; CHECK-NEXT: ldr q0, [x0]
828 ; CHECK-NEXT: ldr q1, [x1]
829 ; CHECK-NEXT: sqrshl v0.8h, v0.8h, v1.8h
831 %tmp1 = load <8 x i16>, ptr %A
832 %tmp2 = load <8 x i16>, ptr %B
833 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
837 define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind {
838 ; CHECK-LABEL: sqrshl4s:
840 ; CHECK-NEXT: ldr q0, [x0]
841 ; CHECK-NEXT: ldr q1, [x1]
842 ; CHECK-NEXT: sqrshl v0.4s, v0.4s, v1.4s
844 %tmp1 = load <4 x i32>, ptr %A
845 %tmp2 = load <4 x i32>, ptr %B
846 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
850 define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind {
851 ; CHECK-LABEL: sqrshl2d:
853 ; CHECK-NEXT: ldr q0, [x0]
854 ; CHECK-NEXT: ldr q1, [x1]
855 ; CHECK-NEXT: sqrshl v0.2d, v0.2d, v1.2d
857 %tmp1 = load <2 x i64>, ptr %A
858 %tmp2 = load <2 x i64>, ptr %B
859 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
863 define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind {
864 ; CHECK-LABEL: sqrshl1d:
866 ; CHECK-NEXT: ldr d0, [x0]
867 ; CHECK-NEXT: ldr d1, [x1]
868 ; CHECK-NEXT: sqrshl d0, d0, d1
870 %tmp1 = load <1 x i64>, ptr %A
871 %tmp2 = load <1 x i64>, ptr %B
872 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
876 define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind {
877 ; CHECK-LABEL: sqrshl1d_constant:
879 ; CHECK-NEXT: mov w8, #1 // =0x1
880 ; CHECK-NEXT: ldr d0, [x0]
881 ; CHECK-NEXT: fmov d1, x8
882 ; CHECK-NEXT: sqrshl d0, d0, d1
884 %tmp1 = load <1 x i64>, ptr %A
885 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
889 define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind {
890 ; CHECK-LABEL: sqrshl_scalar:
892 ; CHECK-NEXT: ldr x8, [x0]
893 ; CHECK-NEXT: ldr x9, [x1]
894 ; CHECK-NEXT: fmov d0, x8
895 ; CHECK-NEXT: fmov d1, x9
896 ; CHECK-NEXT: sqrshl d0, d0, d1
897 ; CHECK-NEXT: fmov x0, d0
899 %tmp1 = load i64, ptr %A
900 %tmp2 = load i64, ptr %B
901 %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2)
905 define i64 @sqrshl_scalar_constant(ptr %A) nounwind {
906 ; CHECK-LABEL: sqrshl_scalar_constant:
908 ; CHECK-NEXT: ldr x9, [x0]
909 ; CHECK-NEXT: mov w8, #1 // =0x1
910 ; CHECK-NEXT: fmov d1, x8
911 ; CHECK-NEXT: fmov d0, x9
912 ; CHECK-NEXT: sqrshl d0, d0, d1
913 ; CHECK-NEXT: fmov x0, d0
915 %tmp1 = load i64, ptr %A
916 %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1)
920 define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind {
921 ; CHECK-LABEL: uqrshl16b:
923 ; CHECK-NEXT: ldr q0, [x0]
924 ; CHECK-NEXT: ldr q1, [x1]
925 ; CHECK-NEXT: uqrshl v0.16b, v0.16b, v1.16b
927 %tmp1 = load <16 x i8>, ptr %A
928 %tmp2 = load <16 x i8>, ptr %B
929 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
933 define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind {
934 ; CHECK-LABEL: uqrshl8h:
936 ; CHECK-NEXT: ldr q0, [x0]
937 ; CHECK-NEXT: ldr q1, [x1]
938 ; CHECK-NEXT: uqrshl v0.8h, v0.8h, v1.8h
940 %tmp1 = load <8 x i16>, ptr %A
941 %tmp2 = load <8 x i16>, ptr %B
942 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
946 define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind {
947 ; CHECK-LABEL: uqrshl4s:
949 ; CHECK-NEXT: ldr q0, [x0]
950 ; CHECK-NEXT: ldr q1, [x1]
951 ; CHECK-NEXT: uqrshl v0.4s, v0.4s, v1.4s
953 %tmp1 = load <4 x i32>, ptr %A
954 %tmp2 = load <4 x i32>, ptr %B
955 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
959 define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind {
960 ; CHECK-LABEL: uqrshl2d:
962 ; CHECK-NEXT: ldr q0, [x0]
963 ; CHECK-NEXT: ldr q1, [x1]
964 ; CHECK-NEXT: uqrshl v0.2d, v0.2d, v1.2d
966 %tmp1 = load <2 x i64>, ptr %A
967 %tmp2 = load <2 x i64>, ptr %B
968 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
972 define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind {
973 ; CHECK-LABEL: uqrshl1d:
975 ; CHECK-NEXT: ldr d0, [x0]
976 ; CHECK-NEXT: ldr d1, [x1]
977 ; CHECK-NEXT: uqrshl d0, d0, d1
979 %tmp1 = load <1 x i64>, ptr %A
980 %tmp2 = load <1 x i64>, ptr %B
981 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
985 define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind {
986 ; CHECK-LABEL: uqrshl1d_constant:
988 ; CHECK-NEXT: mov w8, #1 // =0x1
989 ; CHECK-NEXT: ldr d0, [x0]
990 ; CHECK-NEXT: fmov d1, x8
991 ; CHECK-NEXT: uqrshl d0, d0, d1
993 %tmp1 = load <1 x i64>, ptr %A
994 %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
998 define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind {
999 ; CHECK-LABEL: uqrshl_scalar:
1001 ; CHECK-NEXT: ldr x8, [x0]
1002 ; CHECK-NEXT: ldr x9, [x1]
1003 ; CHECK-NEXT: fmov d0, x8
1004 ; CHECK-NEXT: fmov d1, x9
1005 ; CHECK-NEXT: uqrshl d0, d0, d1
1006 ; CHECK-NEXT: fmov x0, d0
1008 %tmp1 = load i64, ptr %A
1009 %tmp2 = load i64, ptr %B
1010 %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2)
1014 define i64 @uqrshl_scalar_constant(ptr %A) nounwind {
1015 ; CHECK-LABEL: uqrshl_scalar_constant:
1017 ; CHECK-NEXT: ldr x9, [x0]
1018 ; CHECK-NEXT: mov w8, #1 // =0x1
1019 ; CHECK-NEXT: fmov d1, x8
1020 ; CHECK-NEXT: fmov d0, x9
1021 ; CHECK-NEXT: uqrshl d0, d0, d1
1022 ; CHECK-NEXT: fmov x0, d0
1024 %tmp1 = load i64, ptr %A
1025 %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1)
1029 declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
1030 declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
1031 declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
1032 declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
1033 declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone
1035 declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
1036 declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
1037 declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
1038 declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
1039 declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone
1041 declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
1042 declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
1043 declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
1044 declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
1046 declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
1047 declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
1048 declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
1049 declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
1051 define <8 x i8> @urshr8b(ptr %A) nounwind {
1052 ; CHECK-SD-LABEL: urshr8b:
1053 ; CHECK-SD: // %bb.0:
1054 ; CHECK-SD-NEXT: ldr d0, [x0]
1055 ; CHECK-SD-NEXT: urshr v0.8b, v0.8b, #1
1056 ; CHECK-SD-NEXT: ret
1058 ; CHECK-GI-LABEL: urshr8b:
1059 ; CHECK-GI: // %bb.0:
1060 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
1061 ; CHECK-GI-NEXT: ldr d1, [x0]
1062 ; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b
1063 ; CHECK-GI-NEXT: ret
1064 %tmp1 = load <8 x i8>, ptr %A
1065 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1069 define <4 x i16> @urshr4h(ptr %A) nounwind {
1070 ; CHECK-SD-LABEL: urshr4h:
1071 ; CHECK-SD: // %bb.0:
1072 ; CHECK-SD-NEXT: ldr d0, [x0]
1073 ; CHECK-SD-NEXT: urshr v0.4h, v0.4h, #1
1074 ; CHECK-SD-NEXT: ret
1076 ; CHECK-GI-LABEL: urshr4h:
1077 ; CHECK-GI: // %bb.0:
1078 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
1079 ; CHECK-GI-NEXT: ldr d1, [x0]
1080 ; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h
1081 ; CHECK-GI-NEXT: ret
1082 %tmp1 = load <4 x i16>, ptr %A
1083 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
1087 define <2 x i32> @urshr2s(ptr %A) nounwind {
1088 ; CHECK-SD-LABEL: urshr2s:
1089 ; CHECK-SD: // %bb.0:
1090 ; CHECK-SD-NEXT: ldr d0, [x0]
1091 ; CHECK-SD-NEXT: urshr v0.2s, v0.2s, #1
1092 ; CHECK-SD-NEXT: ret
1094 ; CHECK-GI-LABEL: urshr2s:
1095 ; CHECK-GI: // %bb.0:
1096 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
1097 ; CHECK-GI-NEXT: ldr d1, [x0]
1098 ; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s
1099 ; CHECK-GI-NEXT: ret
1100 %tmp1 = load <2 x i32>, ptr %A
1101 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
1105 define <16 x i8> @urshr16b(ptr %A) nounwind {
1106 ; CHECK-SD-LABEL: urshr16b:
1107 ; CHECK-SD: // %bb.0:
1108 ; CHECK-SD-NEXT: ldr q0, [x0]
1109 ; CHECK-SD-NEXT: urshr v0.16b, v0.16b, #1
1110 ; CHECK-SD-NEXT: ret
1112 ; CHECK-GI-LABEL: urshr16b:
1113 ; CHECK-GI: // %bb.0:
1114 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
1115 ; CHECK-GI-NEXT: ldr q1, [x0]
1116 ; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b
1117 ; CHECK-GI-NEXT: ret
1118 %tmp1 = load <16 x i8>, ptr %A
1119 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1123 define <8 x i16> @urshr8h(ptr %A) nounwind {
1124 ; CHECK-SD-LABEL: urshr8h:
1125 ; CHECK-SD: // %bb.0:
1126 ; CHECK-SD-NEXT: ldr q0, [x0]
1127 ; CHECK-SD-NEXT: urshr v0.8h, v0.8h, #1
1128 ; CHECK-SD-NEXT: ret
1130 ; CHECK-GI-LABEL: urshr8h:
1131 ; CHECK-GI: // %bb.0:
1132 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
1133 ; CHECK-GI-NEXT: ldr q1, [x0]
1134 ; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h
1135 ; CHECK-GI-NEXT: ret
1136 %tmp1 = load <8 x i16>, ptr %A
1137 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
1141 define <4 x i32> @urshr4s(ptr %A) nounwind {
1142 ; CHECK-SD-LABEL: urshr4s:
1143 ; CHECK-SD: // %bb.0:
1144 ; CHECK-SD-NEXT: ldr q0, [x0]
1145 ; CHECK-SD-NEXT: urshr v0.4s, v0.4s, #1
1146 ; CHECK-SD-NEXT: ret
1148 ; CHECK-GI-LABEL: urshr4s:
1149 ; CHECK-GI: // %bb.0:
1150 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
1151 ; CHECK-GI-NEXT: ldr q1, [x0]
1152 ; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s
1153 ; CHECK-GI-NEXT: ret
1154 %tmp1 = load <4 x i32>, ptr %A
1155 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
1159 define <2 x i64> @urshr2d(ptr %A) nounwind {
1160 ; CHECK-SD-LABEL: urshr2d:
1161 ; CHECK-SD: // %bb.0:
1162 ; CHECK-SD-NEXT: ldr q0, [x0]
1163 ; CHECK-SD-NEXT: urshr v0.2d, v0.2d, #1
1164 ; CHECK-SD-NEXT: ret
1166 ; CHECK-GI-LABEL: urshr2d:
1167 ; CHECK-GI: // %bb.0:
1168 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
1169 ; CHECK-GI-NEXT: ldr q1, [x0]
1170 ; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d
1171 ; CHECK-GI-NEXT: ret
1172 %tmp1 = load <2 x i64>, ptr %A
1173 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
1177 define <1 x i64> @urshr1d(ptr %A) nounwind {
1178 ; CHECK-LABEL: urshr1d:
1180 ; CHECK-NEXT: ldr d0, [x0]
1181 ; CHECK-NEXT: urshr d0, d0, #1
1183 %tmp1 = load <1 x i64>, ptr %A
1184 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
1188 define i64 @urshr_scalar(ptr %A) nounwind {
1189 ; CHECK-LABEL: urshr_scalar:
1191 ; CHECK-NEXT: ldr d0, [x0]
1192 ; CHECK-NEXT: urshr d0, d0, #1
1193 ; CHECK-NEXT: fmov x0, d0
1195 %tmp1 = load i64, ptr %A
1196 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
1200 define <8 x i8> @srshr8b(ptr %A) nounwind {
1201 ; CHECK-SD-LABEL: srshr8b:
1202 ; CHECK-SD: // %bb.0:
1203 ; CHECK-SD-NEXT: ldr d0, [x0]
1204 ; CHECK-SD-NEXT: srshr v0.8b, v0.8b, #1
1205 ; CHECK-SD-NEXT: ret
1207 ; CHECK-GI-LABEL: srshr8b:
1208 ; CHECK-GI: // %bb.0:
1209 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
1210 ; CHECK-GI-NEXT: ldr d1, [x0]
1211 ; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b
1212 ; CHECK-GI-NEXT: ret
1213 %tmp1 = load <8 x i8>, ptr %A
1214 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1218 define <4 x i16> @srshr4h(ptr %A) nounwind {
1219 ; CHECK-SD-LABEL: srshr4h:
1220 ; CHECK-SD: // %bb.0:
1221 ; CHECK-SD-NEXT: ldr d0, [x0]
1222 ; CHECK-SD-NEXT: srshr v0.4h, v0.4h, #1
1223 ; CHECK-SD-NEXT: ret
1225 ; CHECK-GI-LABEL: srshr4h:
1226 ; CHECK-GI: // %bb.0:
1227 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
1228 ; CHECK-GI-NEXT: ldr d1, [x0]
1229 ; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h
1230 ; CHECK-GI-NEXT: ret
1231 %tmp1 = load <4 x i16>, ptr %A
1232 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
1236 define <2 x i32> @srshr2s(ptr %A) nounwind {
1237 ; CHECK-SD-LABEL: srshr2s:
1238 ; CHECK-SD: // %bb.0:
1239 ; CHECK-SD-NEXT: ldr d0, [x0]
1240 ; CHECK-SD-NEXT: srshr v0.2s, v0.2s, #1
1241 ; CHECK-SD-NEXT: ret
1243 ; CHECK-GI-LABEL: srshr2s:
1244 ; CHECK-GI: // %bb.0:
1245 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
1246 ; CHECK-GI-NEXT: ldr d1, [x0]
1247 ; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s
1248 ; CHECK-GI-NEXT: ret
1249 %tmp1 = load <2 x i32>, ptr %A
1250 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
1254 define <16 x i8> @srshr16b(ptr %A) nounwind {
1255 ; CHECK-SD-LABEL: srshr16b:
1256 ; CHECK-SD: // %bb.0:
1257 ; CHECK-SD-NEXT: ldr q0, [x0]
1258 ; CHECK-SD-NEXT: srshr v0.16b, v0.16b, #1
1259 ; CHECK-SD-NEXT: ret
1261 ; CHECK-GI-LABEL: srshr16b:
1262 ; CHECK-GI: // %bb.0:
1263 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
1264 ; CHECK-GI-NEXT: ldr q1, [x0]
1265 ; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b
1266 ; CHECK-GI-NEXT: ret
1267 %tmp1 = load <16 x i8>, ptr %A
1268 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1272 define <8 x i16> @srshr8h(ptr %A) nounwind {
1273 ; CHECK-SD-LABEL: srshr8h:
1274 ; CHECK-SD: // %bb.0:
1275 ; CHECK-SD-NEXT: ldr q0, [x0]
1276 ; CHECK-SD-NEXT: srshr v0.8h, v0.8h, #1
1277 ; CHECK-SD-NEXT: ret
1279 ; CHECK-GI-LABEL: srshr8h:
1280 ; CHECK-GI: // %bb.0:
1281 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
1282 ; CHECK-GI-NEXT: ldr q1, [x0]
1283 ; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h
1284 ; CHECK-GI-NEXT: ret
1285 %tmp1 = load <8 x i16>, ptr %A
1286 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
1290 define <4 x i32> @srshr4s(ptr %A) nounwind {
1291 ; CHECK-SD-LABEL: srshr4s:
1292 ; CHECK-SD: // %bb.0:
1293 ; CHECK-SD-NEXT: ldr q0, [x0]
1294 ; CHECK-SD-NEXT: srshr v0.4s, v0.4s, #1
1295 ; CHECK-SD-NEXT: ret
1297 ; CHECK-GI-LABEL: srshr4s:
1298 ; CHECK-GI: // %bb.0:
1299 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
1300 ; CHECK-GI-NEXT: ldr q1, [x0]
1301 ; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s
1302 ; CHECK-GI-NEXT: ret
1303 %tmp1 = load <4 x i32>, ptr %A
1304 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
1308 define <2 x i64> @srshr2d(ptr %A) nounwind {
1309 ; CHECK-SD-LABEL: srshr2d:
1310 ; CHECK-SD: // %bb.0:
1311 ; CHECK-SD-NEXT: ldr q0, [x0]
1312 ; CHECK-SD-NEXT: srshr v0.2d, v0.2d, #1
1313 ; CHECK-SD-NEXT: ret
1315 ; CHECK-GI-LABEL: srshr2d:
1316 ; CHECK-GI: // %bb.0:
1317 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
1318 ; CHECK-GI-NEXT: ldr q1, [x0]
1319 ; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d
1320 ; CHECK-GI-NEXT: ret
1321 %tmp1 = load <2 x i64>, ptr %A
1322 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
1326 define <1 x i64> @srshr1d(ptr %A) nounwind {
1327 ; CHECK-LABEL: srshr1d:
1329 ; CHECK-NEXT: ldr d0, [x0]
1330 ; CHECK-NEXT: srshr d0, d0, #1
1332 %tmp1 = load <1 x i64>, ptr %A
1333 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
1337 define i64 @srshr_scalar(ptr %A) nounwind {
1338 ; CHECK-LABEL: srshr_scalar:
1340 ; CHECK-NEXT: ldr d0, [x0]
1341 ; CHECK-NEXT: srshr d0, d0, #1
1342 ; CHECK-NEXT: fmov x0, d0
1344 %tmp1 = load i64, ptr %A
1345 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
1349 define <8 x i8> @sqshlu8b(ptr %A) nounwind {
1350 ; CHECK-LABEL: sqshlu8b:
1352 ; CHECK-NEXT: ldr d0, [x0]
1353 ; CHECK-NEXT: sqshlu v0.8b, v0.8b, #1
1355 %tmp1 = load <8 x i8>, ptr %A
1356 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1360 define <4 x i16> @sqshlu4h(ptr %A) nounwind {
1361 ; CHECK-LABEL: sqshlu4h:
1363 ; CHECK-NEXT: ldr d0, [x0]
1364 ; CHECK-NEXT: sqshlu v0.4h, v0.4h, #1
1366 %tmp1 = load <4 x i16>, ptr %A
1367 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
1371 define <2 x i32> @sqshlu2s(ptr %A) nounwind {
1372 ; CHECK-LABEL: sqshlu2s:
1374 ; CHECK-NEXT: ldr d0, [x0]
1375 ; CHECK-NEXT: sqshlu v0.2s, v0.2s, #1
1377 %tmp1 = load <2 x i32>, ptr %A
1378 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
1382 define <16 x i8> @sqshlu16b(ptr %A) nounwind {
1383 ; CHECK-LABEL: sqshlu16b:
1385 ; CHECK-NEXT: ldr q0, [x0]
1386 ; CHECK-NEXT: sqshlu v0.16b, v0.16b, #1
1388 %tmp1 = load <16 x i8>, ptr %A
1389 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1393 define <8 x i16> @sqshlu8h(ptr %A) nounwind {
1394 ; CHECK-LABEL: sqshlu8h:
1396 ; CHECK-NEXT: ldr q0, [x0]
1397 ; CHECK-NEXT: sqshlu v0.8h, v0.8h, #1
1399 %tmp1 = load <8 x i16>, ptr %A
1400 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
1404 define <4 x i32> @sqshlu4s(ptr %A) nounwind {
1405 ; CHECK-LABEL: sqshlu4s:
1407 ; CHECK-NEXT: ldr q0, [x0]
1408 ; CHECK-NEXT: sqshlu v0.4s, v0.4s, #1
1410 %tmp1 = load <4 x i32>, ptr %A
1411 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
1415 define <2 x i64> @sqshlu2d(ptr %A) nounwind {
1416 ; CHECK-LABEL: sqshlu2d:
1418 ; CHECK-NEXT: ldr q0, [x0]
1419 ; CHECK-NEXT: sqshlu v0.2d, v0.2d, #1
1421 %tmp1 = load <2 x i64>, ptr %A
1422 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
1426 define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind {
1427 ; CHECK-LABEL: sqshlu1d_constant:
1429 ; CHECK-NEXT: ldr d0, [x0]
1430 ; CHECK-NEXT: sqshlu d0, d0, #1
1432 %tmp1 = load <1 x i64>, ptr %A
1433 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
1437 define i64 @sqshlu_i64_constant(ptr %A) nounwind {
1438 ; CHECK-LABEL: sqshlu_i64_constant:
1440 ; CHECK-NEXT: ldr d0, [x0]
1441 ; CHECK-NEXT: sqshlu d0, d0, #1
1442 ; CHECK-NEXT: fmov x0, d0
1444 %tmp1 = load i64, ptr %A
1445 %tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1)
1449 define i32 @sqshlu_i32_constant(ptr %A) nounwind {
1450 ; CHECK-LABEL: sqshlu_i32_constant:
1452 ; CHECK-NEXT: ldr w8, [x0]
1453 ; CHECK-NEXT: fmov s0, w8
1454 ; CHECK-NEXT: sqshlu s0, s0, #1
1455 ; CHECK-NEXT: fmov w0, s0
1457 %tmp1 = load i32, ptr %A
1458 %tmp3 = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %tmp1, i32 1)
1462 declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
1463 declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
1464 declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
1465 declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
1466 declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone
1467 declare i32 @llvm.aarch64.neon.sqshlu.i32(i32, i32) nounwind readnone
1469 declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
1470 declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
1471 declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
1472 declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
1474 define <8 x i8> @rshrn8b(ptr %A) nounwind {
1475 ; CHECK-LABEL: rshrn8b:
1477 ; CHECK-NEXT: ldr q0, [x0]
1478 ; CHECK-NEXT: rshrn v0.8b, v0.8h, #1
1480 %tmp1 = load <8 x i16>, ptr %A
1481 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
1485 define <4 x i16> @rshrn4h(ptr %A) nounwind {
1486 ; CHECK-LABEL: rshrn4h:
1488 ; CHECK-NEXT: ldr q0, [x0]
1489 ; CHECK-NEXT: rshrn v0.4h, v0.4s, #1
1491 %tmp1 = load <4 x i32>, ptr %A
1492 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
1496 define <2 x i32> @rshrn2s(ptr %A) nounwind {
1497 ; CHECK-LABEL: rshrn2s:
1499 ; CHECK-NEXT: ldr q0, [x0]
1500 ; CHECK-NEXT: rshrn v0.2s, v0.2d, #1
1502 %tmp1 = load <2 x i64>, ptr %A
1503 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
1507 define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind {
1508 ; CHECK-LABEL: rshrn16b:
1510 ; CHECK-NEXT: ldr d0, [x0]
1511 ; CHECK-NEXT: ldr q1, [x1]
1512 ; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #1
1514 %out = load <8 x i8>, ptr %ret
1515 %tmp1 = load <8 x i16>, ptr %A
1516 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
1517 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1521 define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind {
1522 ; CHECK-LABEL: rshrn8h:
1524 ; CHECK-NEXT: ldr d0, [x0]
1525 ; CHECK-NEXT: ldr q1, [x1]
1526 ; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #1
1528 %out = load <4 x i16>, ptr %ret
1529 %tmp1 = load <4 x i32>, ptr %A
1530 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
1531 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1535 define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind {
1536 ; CHECK-LABEL: rshrn4s:
1538 ; CHECK-NEXT: ldr d0, [x0]
1539 ; CHECK-NEXT: ldr q1, [x1]
1540 ; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #1
1542 %out = load <2 x i32>, ptr %ret
1543 %tmp1 = load <2 x i64>, ptr %A
1544 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
1545 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1549 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
1550 declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
1551 declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
1553 define <8 x i8> @shrn8b(ptr %A) nounwind {
1554 ; CHECK-LABEL: shrn8b:
1556 ; CHECK-NEXT: ldr q0, [x0]
1557 ; CHECK-NEXT: shrn v0.8b, v0.8h, #1
1559 %tmp1 = load <8 x i16>, ptr %A
1560 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1561 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
1565 define <4 x i16> @shrn4h(ptr %A) nounwind {
1566 ; CHECK-LABEL: shrn4h:
1568 ; CHECK-NEXT: ldr q0, [x0]
1569 ; CHECK-NEXT: shrn v0.4h, v0.4s, #1
1571 %tmp1 = load <4 x i32>, ptr %A
1572 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
1573 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
1577 define <2 x i32> @shrn2s(ptr %A) nounwind {
1578 ; CHECK-LABEL: shrn2s:
1580 ; CHECK-NEXT: ldr q0, [x0]
1581 ; CHECK-NEXT: shrn v0.2s, v0.2d, #1
1583 %tmp1 = load <2 x i64>, ptr %A
1584 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
1585 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
1589 define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind {
1590 ; CHECK-LABEL: shrn16b:
1592 ; CHECK-NEXT: ldr d0, [x0]
1593 ; CHECK-NEXT: ldr q1, [x1]
1594 ; CHECK-NEXT: shrn2 v0.16b, v1.8h, #1
1596 %out = load <8 x i8>, ptr %ret
1597 %tmp1 = load <8 x i16>, ptr %A
1598 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1599 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
1600 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1604 define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind {
1605 ; CHECK-LABEL: shrn8h:
1607 ; CHECK-NEXT: ldr d0, [x0]
1608 ; CHECK-NEXT: ldr q1, [x1]
1609 ; CHECK-NEXT: shrn2 v0.8h, v1.4s, #1
1611 %out = load <4 x i16>, ptr %ret
1612 %tmp1 = load <4 x i32>, ptr %A
1613 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
1614 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
1615 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1619 define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind {
1620 ; CHECK-LABEL: shrn4s:
1622 ; CHECK-NEXT: ldr d0, [x0]
1623 ; CHECK-NEXT: ldr q1, [x1]
1624 ; CHECK-NEXT: shrn2 v0.4s, v1.2d, #1
1626 %out = load <2 x i32>, ptr %ret
1627 %tmp1 = load <2 x i64>, ptr %A
1628 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
1629 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
1630 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1634 declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
1635 declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
1636 declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
1638 define i32 @sqshrn1s(i64 %A) nounwind {
1639 ; CHECK-LABEL: sqshrn1s:
1641 ; CHECK-NEXT: fmov d0, x0
1642 ; CHECK-NEXT: sqshrn s0, d0, #1
1643 ; CHECK-NEXT: fmov w0, s0
1645 %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
1649 define <8 x i8> @sqshrn8b(ptr %A) nounwind {
1650 ; CHECK-LABEL: sqshrn8b:
1652 ; CHECK-NEXT: ldr q0, [x0]
1653 ; CHECK-NEXT: sqshrn v0.8b, v0.8h, #1
1655 %tmp1 = load <8 x i16>, ptr %A
1656 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1660 define <4 x i16> @sqshrn4h(ptr %A) nounwind {
1661 ; CHECK-LABEL: sqshrn4h:
1663 ; CHECK-NEXT: ldr q0, [x0]
1664 ; CHECK-NEXT: sqshrn v0.4h, v0.4s, #1
1666 %tmp1 = load <4 x i32>, ptr %A
1667 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1671 define <2 x i32> @sqshrn2s(ptr %A) nounwind {
1672 ; CHECK-LABEL: sqshrn2s:
1674 ; CHECK-NEXT: ldr q0, [x0]
1675 ; CHECK-NEXT: sqshrn v0.2s, v0.2d, #1
1677 %tmp1 = load <2 x i64>, ptr %A
1678 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1683 define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind {
1684 ; CHECK-LABEL: sqshrn16b:
1686 ; CHECK-NEXT: ldr d0, [x0]
1687 ; CHECK-NEXT: ldr q1, [x1]
1688 ; CHECK-NEXT: sqshrn2 v0.16b, v1.8h, #1
1690 %out = load <8 x i8>, ptr %ret
1691 %tmp1 = load <8 x i16>, ptr %A
1692 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1693 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1697 define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind {
1698 ; CHECK-LABEL: sqshrn8h:
1700 ; CHECK-NEXT: ldr d0, [x0]
1701 ; CHECK-NEXT: ldr q1, [x1]
1702 ; CHECK-NEXT: sqshrn2 v0.8h, v1.4s, #1
1704 %out = load <4 x i16>, ptr %ret
1705 %tmp1 = load <4 x i32>, ptr %A
1706 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1707 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1711 define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind {
1712 ; CHECK-LABEL: sqshrn4s:
1714 ; CHECK-NEXT: ldr d0, [x0]
1715 ; CHECK-NEXT: ldr q1, [x1]
1716 ; CHECK-NEXT: sqshrn2 v0.4s, v1.2d, #1
1718 %out = load <2 x i32>, ptr %ret
1719 %tmp1 = load <2 x i64>, ptr %A
1720 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1721 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1725 declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
1726 declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
1727 declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
1728 declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
1730 define i32 @sqshrun1s(i64 %A) nounwind {
1731 ; CHECK-LABEL: sqshrun1s:
1733 ; CHECK-NEXT: fmov d0, x0
1734 ; CHECK-NEXT: sqshrun s0, d0, #1
1735 ; CHECK-NEXT: fmov w0, s0
1737 %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
1741 define <8 x i8> @sqshrun8b(ptr %A) nounwind {
1742 ; CHECK-LABEL: sqshrun8b:
1744 ; CHECK-NEXT: ldr q0, [x0]
1745 ; CHECK-NEXT: sqshrun v0.8b, v0.8h, #1
1747 %tmp1 = load <8 x i16>, ptr %A
1748 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
1752 define <4 x i16> @sqshrun4h(ptr %A) nounwind {
1753 ; CHECK-LABEL: sqshrun4h:
1755 ; CHECK-NEXT: ldr q0, [x0]
1756 ; CHECK-NEXT: sqshrun v0.4h, v0.4s, #1
1758 %tmp1 = load <4 x i32>, ptr %A
1759 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
1763 define <2 x i32> @sqshrun2s(ptr %A) nounwind {
1764 ; CHECK-LABEL: sqshrun2s:
1766 ; CHECK-NEXT: ldr q0, [x0]
1767 ; CHECK-NEXT: sqshrun v0.2s, v0.2d, #1
1769 %tmp1 = load <2 x i64>, ptr %A
1770 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
1774 define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind {
1775 ; CHECK-LABEL: sqshrun16b:
1777 ; CHECK-NEXT: ldr d0, [x0]
1778 ; CHECK-NEXT: ldr q1, [x1]
1779 ; CHECK-NEXT: sqshrun2 v0.16b, v1.8h, #1
1781 %out = load <8 x i8>, ptr %ret
1782 %tmp1 = load <8 x i16>, ptr %A
1783 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
1784 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1788 define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind {
1789 ; CHECK-LABEL: sqshrun8h:
1791 ; CHECK-NEXT: ldr d0, [x0]
1792 ; CHECK-NEXT: ldr q1, [x1]
1793 ; CHECK-NEXT: sqshrun2 v0.8h, v1.4s, #1
1795 %out = load <4 x i16>, ptr %ret
1796 %tmp1 = load <4 x i32>, ptr %A
1797 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
1798 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1802 define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind {
1803 ; CHECK-LABEL: sqshrun4s:
1805 ; CHECK-NEXT: ldr d0, [x0]
1806 ; CHECK-NEXT: ldr q1, [x1]
1807 ; CHECK-NEXT: sqshrun2 v0.4s, v1.2d, #1
1809 %out = load <2 x i32>, ptr %ret
1810 %tmp1 = load <2 x i64>, ptr %A
1811 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
1812 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1816 declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
1817 declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
1818 declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
1819 declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
1821 define i32 @sqrshrn1s(i64 %A) nounwind {
1822 ; CHECK-LABEL: sqrshrn1s:
1824 ; CHECK-NEXT: fmov d0, x0
1825 ; CHECK-NEXT: sqrshrn s0, d0, #1
1826 ; CHECK-NEXT: fmov w0, s0
1828 %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
1832 define <8 x i8> @sqrshrn8b(ptr %A) nounwind {
1833 ; CHECK-LABEL: sqrshrn8b:
1835 ; CHECK-NEXT: ldr q0, [x0]
1836 ; CHECK-NEXT: sqrshrn v0.8b, v0.8h, #1
1838 %tmp1 = load <8 x i16>, ptr %A
1839 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1843 define <4 x i16> @sqrshrn4h(ptr %A) nounwind {
1844 ; CHECK-LABEL: sqrshrn4h:
1846 ; CHECK-NEXT: ldr q0, [x0]
1847 ; CHECK-NEXT: sqrshrn v0.4h, v0.4s, #1
1849 %tmp1 = load <4 x i32>, ptr %A
1850 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1854 define <2 x i32> @sqrshrn2s(ptr %A) nounwind {
1855 ; CHECK-LABEL: sqrshrn2s:
1857 ; CHECK-NEXT: ldr q0, [x0]
1858 ; CHECK-NEXT: sqrshrn v0.2s, v0.2d, #1
1860 %tmp1 = load <2 x i64>, ptr %A
1861 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1865 define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind {
1866 ; CHECK-LABEL: sqrshrn16b:
1868 ; CHECK-NEXT: ldr d0, [x0]
1869 ; CHECK-NEXT: ldr q1, [x1]
1870 ; CHECK-NEXT: sqrshrn2 v0.16b, v1.8h, #1
1872 %out = load <8 x i8>, ptr %ret
1873 %tmp1 = load <8 x i16>, ptr %A
1874 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1875 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1879 define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind {
1880 ; CHECK-LABEL: sqrshrn8h:
1882 ; CHECK-NEXT: ldr d0, [x0]
1883 ; CHECK-NEXT: ldr q1, [x1]
1884 ; CHECK-NEXT: sqrshrn2 v0.8h, v1.4s, #1
1886 %out = load <4 x i16>, ptr %ret
1887 %tmp1 = load <4 x i32>, ptr %A
1888 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1889 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1893 define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind {
1894 ; CHECK-LABEL: sqrshrn4s:
1896 ; CHECK-NEXT: ldr d0, [x0]
1897 ; CHECK-NEXT: ldr q1, [x1]
1898 ; CHECK-NEXT: sqrshrn2 v0.4s, v1.2d, #1
1900 %out = load <2 x i32>, ptr %ret
1901 %tmp1 = load <2 x i64>, ptr %A
1902 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1903 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1907 declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
1908 declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
1909 declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
1910 declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
1912 define i32 @sqrshrun1s(i64 %A) nounwind {
1913 ; CHECK-LABEL: sqrshrun1s:
1915 ; CHECK-NEXT: fmov d0, x0
1916 ; CHECK-NEXT: sqrshrun s0, d0, #1
1917 ; CHECK-NEXT: fmov w0, s0
1919 %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
1923 define <8 x i8> @sqrshrun8b(ptr %A) nounwind {
1924 ; CHECK-LABEL: sqrshrun8b:
1926 ; CHECK-NEXT: ldr q0, [x0]
1927 ; CHECK-NEXT: sqrshrun v0.8b, v0.8h, #1
1929 %tmp1 = load <8 x i16>, ptr %A
1930 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
1934 define <4 x i16> @sqrshrun4h(ptr %A) nounwind {
1935 ; CHECK-LABEL: sqrshrun4h:
1937 ; CHECK-NEXT: ldr q0, [x0]
1938 ; CHECK-NEXT: sqrshrun v0.4h, v0.4s, #1
1940 %tmp1 = load <4 x i32>, ptr %A
1941 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
1945 define <2 x i32> @sqrshrun2s(ptr %A) nounwind {
1946 ; CHECK-LABEL: sqrshrun2s:
1948 ; CHECK-NEXT: ldr q0, [x0]
1949 ; CHECK-NEXT: sqrshrun v0.2s, v0.2d, #1
1951 %tmp1 = load <2 x i64>, ptr %A
1952 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
1956 define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind {
1957 ; CHECK-LABEL: sqrshrun16b:
1959 ; CHECK-NEXT: ldr d0, [x0]
1960 ; CHECK-NEXT: ldr q1, [x1]
1961 ; CHECK-NEXT: sqrshrun2 v0.16b, v1.8h, #1
1963 %out = load <8 x i8>, ptr %ret
1964 %tmp1 = load <8 x i16>, ptr %A
1965 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
1966 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1970 define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind {
1971 ; CHECK-LABEL: sqrshrun8h:
1973 ; CHECK-NEXT: ldr d0, [x0]
1974 ; CHECK-NEXT: ldr q1, [x1]
1975 ; CHECK-NEXT: sqrshrun2 v0.8h, v1.4s, #1
1977 %out = load <4 x i16>, ptr %ret
1978 %tmp1 = load <4 x i32>, ptr %A
1979 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
1980 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1984 define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind {
1985 ; CHECK-LABEL: sqrshrun4s:
1987 ; CHECK-NEXT: ldr d0, [x0]
1988 ; CHECK-NEXT: ldr q1, [x1]
1989 ; CHECK-NEXT: sqrshrun2 v0.4s, v1.2d, #1
1991 %out = load <2 x i32>, ptr %ret
1992 %tmp1 = load <2 x i64>, ptr %A
1993 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
1994 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1998 declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
1999 declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
2000 declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
2001 declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
2003 define i32 @uqrshrn1s(i64 %A) nounwind {
2004 ; CHECK-LABEL: uqrshrn1s:
2006 ; CHECK-NEXT: fmov d0, x0
2007 ; CHECK-NEXT: uqrshrn s0, d0, #1
2008 ; CHECK-NEXT: fmov w0, s0
2010 %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
2014 define <8 x i8> @uqrshrn8b(ptr %A) nounwind {
2015 ; CHECK-LABEL: uqrshrn8b:
2017 ; CHECK-NEXT: ldr q0, [x0]
2018 ; CHECK-NEXT: uqrshrn v0.8b, v0.8h, #1
2020 %tmp1 = load <8 x i16>, ptr %A
2021 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
2025 define <4 x i16> @uqrshrn4h(ptr %A) nounwind {
2026 ; CHECK-LABEL: uqrshrn4h:
2028 ; CHECK-NEXT: ldr q0, [x0]
2029 ; CHECK-NEXT: uqrshrn v0.4h, v0.4s, #1
2031 %tmp1 = load <4 x i32>, ptr %A
2032 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
2036 define <2 x i32> @uqrshrn2s(ptr %A) nounwind {
2037 ; CHECK-LABEL: uqrshrn2s:
2039 ; CHECK-NEXT: ldr q0, [x0]
2040 ; CHECK-NEXT: uqrshrn v0.2s, v0.2d, #1
2042 %tmp1 = load <2 x i64>, ptr %A
2043 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
2047 define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind {
2048 ; CHECK-LABEL: uqrshrn16b:
2050 ; CHECK-NEXT: ldr d0, [x0]
2051 ; CHECK-NEXT: ldr q1, [x1]
2052 ; CHECK-NEXT: uqrshrn2 v0.16b, v1.8h, #1
2054 %out = load <8 x i8>, ptr %ret
2055 %tmp1 = load <8 x i16>, ptr %A
2056 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
2057 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2061 define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind {
2062 ; CHECK-LABEL: uqrshrn8h:
2064 ; CHECK-NEXT: ldr d0, [x0]
2065 ; CHECK-NEXT: ldr q1, [x1]
2066 ; CHECK-NEXT: uqrshrn2 v0.8h, v1.4s, #1
2068 %out = load <4 x i16>, ptr %ret
2069 %tmp1 = load <4 x i32>, ptr %A
2070 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
2071 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2075 define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind {
2076 ; CHECK-LABEL: uqrshrn4s:
2078 ; CHECK-NEXT: ldr d0, [x0]
2079 ; CHECK-NEXT: ldr q1, [x1]
2080 ; CHECK-NEXT: uqrshrn2 v0.4s, v1.2d, #1
2082 %out = load <2 x i32>, ptr %ret
2083 %tmp1 = load <2 x i64>, ptr %A
2084 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
2085 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2089 declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
2090 declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
2091 declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
2092 declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
2094 define i32 @uqshrn1s(i64 %A) nounwind {
2095 ; CHECK-LABEL: uqshrn1s:
2097 ; CHECK-NEXT: fmov d0, x0
2098 ; CHECK-NEXT: uqshrn s0, d0, #1
2099 ; CHECK-NEXT: fmov w0, s0
2101 %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
2105 define <8 x i8> @uqshrn8b(ptr %A) nounwind {
2106 ; CHECK-LABEL: uqshrn8b:
2108 ; CHECK-NEXT: ldr q0, [x0]
2109 ; CHECK-NEXT: uqshrn v0.8b, v0.8h, #1
2111 %tmp1 = load <8 x i16>, ptr %A
2112 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
2116 define <4 x i16> @uqshrn4h(ptr %A) nounwind {
2117 ; CHECK-LABEL: uqshrn4h:
2119 ; CHECK-NEXT: ldr q0, [x0]
2120 ; CHECK-NEXT: uqshrn v0.4h, v0.4s, #1
2122 %tmp1 = load <4 x i32>, ptr %A
2123 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
2127 define <2 x i32> @uqshrn2s(ptr %A) nounwind {
2128 ; CHECK-LABEL: uqshrn2s:
2130 ; CHECK-NEXT: ldr q0, [x0]
2131 ; CHECK-NEXT: uqshrn v0.2s, v0.2d, #1
2133 %tmp1 = load <2 x i64>, ptr %A
2134 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
2138 define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind {
2139 ; CHECK-LABEL: uqshrn16b:
2141 ; CHECK-NEXT: ldr d0, [x0]
2142 ; CHECK-NEXT: ldr q1, [x1]
2143 ; CHECK-NEXT: uqshrn2 v0.16b, v1.8h, #1
2145 %out = load <8 x i8>, ptr %ret
2146 %tmp1 = load <8 x i16>, ptr %A
2147 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
2148 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2152 define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind {
2153 ; CHECK-LABEL: uqshrn8h:
2155 ; CHECK-NEXT: ldr d0, [x0]
2156 ; CHECK-NEXT: ldr q1, [x1]
2157 ; CHECK-NEXT: uqshrn2 v0.8h, v1.4s, #1
2159 %out = load <4 x i16>, ptr %ret
2160 %tmp1 = load <4 x i32>, ptr %A
2161 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
2162 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2166 define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind {
2167 ; CHECK-LABEL: uqshrn4s:
2169 ; CHECK-NEXT: ldr d0, [x0]
2170 ; CHECK-NEXT: ldr q1, [x1]
2171 ; CHECK-NEXT: uqshrn2 v0.4s, v1.2d, #1
2173 %out = load <2 x i32>, ptr %ret
2174 %tmp1 = load <2 x i64>, ptr %A
2175 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
2176 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2180 declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
2181 declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
2182 declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
2183 declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
2185 define <8 x i16> @ushll8h(ptr %A) nounwind {
2186 ; CHECK-LABEL: ushll8h:
2188 ; CHECK-NEXT: ldr d0, [x0]
2189 ; CHECK-NEXT: ushll v0.8h, v0.8b, #1
2191 %tmp1 = load <8 x i8>, ptr %A
2192 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
2193 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2197 define <4 x i32> @ushll4s(ptr %A) nounwind {
2198 ; CHECK-LABEL: ushll4s:
2200 ; CHECK-NEXT: ldr d0, [x0]
2201 ; CHECK-NEXT: ushll v0.4s, v0.4h, #1
2203 %tmp1 = load <4 x i16>, ptr %A
2204 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2205 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
2209 define <2 x i64> @ushll2d(ptr %A) nounwind {
2210 ; CHECK-LABEL: ushll2d:
2212 ; CHECK-NEXT: ldr d0, [x0]
2213 ; CHECK-NEXT: ushll v0.2d, v0.2s, #1
2215 %tmp1 = load <2 x i32>, ptr %A
2216 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
2217 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2221 define <8 x i16> @ushll2_8h(ptr %A) nounwind {
2222 ; CHECK-SD-LABEL: ushll2_8h:
2223 ; CHECK-SD: // %bb.0:
2224 ; CHECK-SD-NEXT: ldr d0, [x0, #8]
2225 ; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1
2226 ; CHECK-SD-NEXT: ret
2228 ; CHECK-GI-LABEL: ushll2_8h:
2229 ; CHECK-GI: // %bb.0:
2230 ; CHECK-GI-NEXT: ldr q0, [x0]
2231 ; CHECK-GI-NEXT: mov d0, v0.d[1]
2232 ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #1
2233 ; CHECK-GI-NEXT: ret
2234 %load1 = load <16 x i8>, ptr %A
2235 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2236 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
2237 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2241 define <4 x i32> @ushll2_4s(ptr %A) nounwind {
2242 ; CHECK-SD-LABEL: ushll2_4s:
2243 ; CHECK-SD: // %bb.0:
2244 ; CHECK-SD-NEXT: ldr d0, [x0, #8]
2245 ; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1
2246 ; CHECK-SD-NEXT: ret
2248 ; CHECK-GI-LABEL: ushll2_4s:
2249 ; CHECK-GI: // %bb.0:
2250 ; CHECK-GI-NEXT: ldr q0, [x0]
2251 ; CHECK-GI-NEXT: mov d0, v0.d[1]
2252 ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #1
2253 ; CHECK-GI-NEXT: ret
2254 %load1 = load <8 x i16>, ptr %A
2255 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2256 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2257 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
2261 define <2 x i64> @ushll2_2d(ptr %A) nounwind {
2262 ; CHECK-SD-LABEL: ushll2_2d:
2263 ; CHECK-SD: // %bb.0:
2264 ; CHECK-SD-NEXT: ldr d0, [x0, #8]
2265 ; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1
2266 ; CHECK-SD-NEXT: ret
2268 ; CHECK-GI-LABEL: ushll2_2d:
2269 ; CHECK-GI: // %bb.0:
2270 ; CHECK-GI-NEXT: ldr q0, [x0]
2271 ; CHECK-GI-NEXT: mov d0, v0.d[1]
2272 ; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #1
2273 ; CHECK-GI-NEXT: ret
2274 %load1 = load <4 x i32>, ptr %A
2275 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2276 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
2277 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2281 declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>)
2282 declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>)
2283 declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>)
2284 declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>)
2285 declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>)
2286 declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64)
2288 define <8 x i16> @neon_ushll8h_constant_shift(ptr %A) nounwind {
2289 ; CHECK-SD-LABEL: neon_ushll8h_constant_shift:
2290 ; CHECK-SD: // %bb.0:
2291 ; CHECK-SD-NEXT: ldr d0, [x0]
2292 ; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1
2293 ; CHECK-SD-NEXT: ret
2295 ; CHECK-GI-LABEL: neon_ushll8h_constant_shift:
2296 ; CHECK-GI: // %bb.0:
2297 ; CHECK-GI-NEXT: ldr d0, [x0]
2298 ; CHECK-GI-NEXT: movi v1.8h, #1
2299 ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
2300 ; CHECK-GI-NEXT: ushl v0.8h, v0.8h, v1.8h
2301 ; CHECK-GI-NEXT: ret
2302 %tmp1 = load <8 x i8>, ptr %A
2303 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
2304 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2308 define <8 x i16> @neon_ushl8h_no_constant_shift(ptr %A) nounwind {
2309 ; CHECK-LABEL: neon_ushl8h_no_constant_shift:
2311 ; CHECK-NEXT: ldr d0, [x0]
2312 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
2313 ; CHECK-NEXT: ushl v0.8h, v0.8h, v0.8h
2315 %tmp1 = load <8 x i8>, ptr %A
2316 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
2317 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp2)
2321 define <4 x i32> @neon_ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind {
2322 ; CHECK-SD-LABEL: neon_ushl8h_constant_shift_extend_not_2x:
2323 ; CHECK-SD: // %bb.0:
2324 ; CHECK-SD-NEXT: ldr s0, [x0]
2325 ; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
2326 ; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1
2327 ; CHECK-SD-NEXT: ret
2329 ; CHECK-GI-LABEL: neon_ushl8h_constant_shift_extend_not_2x:
2330 ; CHECK-GI: // %bb.0:
2331 ; CHECK-GI-NEXT: ldr w8, [x0]
2332 ; CHECK-GI-NEXT: movi v0.4s, #1
2333 ; CHECK-GI-NEXT: fmov s1, w8
2334 ; CHECK-GI-NEXT: uxtb w8, w8
2335 ; CHECK-GI-NEXT: mov b2, v1.b[2]
2336 ; CHECK-GI-NEXT: mov b3, v1.b[1]
2337 ; CHECK-GI-NEXT: mov b4, v1.b[3]
2338 ; CHECK-GI-NEXT: fmov s1, w8
2339 ; CHECK-GI-NEXT: fmov w9, s2
2340 ; CHECK-GI-NEXT: fmov w10, s3
2341 ; CHECK-GI-NEXT: fmov w11, s4
2342 ; CHECK-GI-NEXT: uxtb w9, w9
2343 ; CHECK-GI-NEXT: uxtb w10, w10
2344 ; CHECK-GI-NEXT: uxtb w11, w11
2345 ; CHECK-GI-NEXT: fmov s2, w9
2346 ; CHECK-GI-NEXT: mov v1.h[1], w10
2347 ; CHECK-GI-NEXT: mov v2.h[1], w11
2348 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
2349 ; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
2350 ; CHECK-GI-NEXT: mov v1.d[1], v2.d[0]
2351 ; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s
2352 ; CHECK-GI-NEXT: ret
2353 %tmp1 = load <4 x i8>, ptr %A
2354 %tmp2 = zext <4 x i8> %tmp1 to <4 x i32>
2355 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2359 define <8 x i16> @neon_ushl8_noext_constant_shift(ptr %A) nounwind {
2360 ; CHECK-SD-LABEL: neon_ushl8_noext_constant_shift:
2361 ; CHECK-SD: // %bb.0:
2362 ; CHECK-SD-NEXT: ldr q0, [x0]
2363 ; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
2364 ; CHECK-SD-NEXT: ret
2366 ; CHECK-GI-LABEL: neon_ushl8_noext_constant_shift:
2367 ; CHECK-GI: // %bb.0:
2368 ; CHECK-GI-NEXT: movi v0.8h, #1
2369 ; CHECK-GI-NEXT: ldr q1, [x0]
2370 ; CHECK-GI-NEXT: ushl v0.8h, v1.8h, v0.8h
2371 ; CHECK-GI-NEXT: ret
2372 %tmp1 = load <8 x i16>, ptr %A
2373 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2377 define <4 x i32> @neon_ushll4s_constant_shift(ptr %A) nounwind {
2378 ; CHECK-SD-LABEL: neon_ushll4s_constant_shift:
2379 ; CHECK-SD: // %bb.0:
2380 ; CHECK-SD-NEXT: ldr d0, [x0]
2381 ; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1
2382 ; CHECK-SD-NEXT: ret
2384 ; CHECK-GI-LABEL: neon_ushll4s_constant_shift:
2385 ; CHECK-GI: // %bb.0:
2386 ; CHECK-GI-NEXT: ldr d0, [x0]
2387 ; CHECK-GI-NEXT: movi v1.4s, #1
2388 ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
2389 ; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
2390 ; CHECK-GI-NEXT: ret
2391 %tmp1 = load <4 x i16>, ptr %A
2392 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2393 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2397 ; FIXME: unnecessary ushll.4s v0, v0, #0?
2398 define <4 x i32> @neon_ushll4s_neg_constant_shift(ptr %A) nounwind {
2399 ; CHECK-SD-LABEL: neon_ushll4s_neg_constant_shift:
2400 ; CHECK-SD: // %bb.0:
2401 ; CHECK-SD-NEXT: ldr d0, [x0]
2402 ; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
2403 ; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #1
2404 ; CHECK-SD-NEXT: ret
2406 ; CHECK-GI-LABEL: neon_ushll4s_neg_constant_shift:
2407 ; CHECK-GI: // %bb.0:
2408 ; CHECK-GI-NEXT: ldr d0, [x0]
2409 ; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
2410 ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
2411 ; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
2412 ; CHECK-GI-NEXT: ret
2413 %tmp1 = load <4 x i16>, ptr %A
2414 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
2415 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
2419 ; FIXME: should be constant folded.
2420 define <4 x i32> @neon_ushll4s_constant_fold() nounwind {
2421 ; CHECK-SD-LABEL: neon_ushll4s_constant_fold:
2422 ; CHECK-SD: // %bb.0:
2423 ; CHECK-SD-NEXT: adrp x8, .LCPI160_0
2424 ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI160_0]
2425 ; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s
2426 ; CHECK-SD-NEXT: ret
2428 ; CHECK-GI-LABEL: neon_ushll4s_constant_fold:
2429 ; CHECK-GI: // %bb.0:
2430 ; CHECK-GI-NEXT: movi v0.4s, #1
2431 ; CHECK-GI-NEXT: adrp x8, .LCPI160_0
2432 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI160_0]
2433 ; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s
2434 ; CHECK-GI-NEXT: ret
2435 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2439 define <2 x i64> @neon_ushll2d_constant_shift(ptr %A) nounwind {
2440 ; CHECK-SD-LABEL: neon_ushll2d_constant_shift:
2441 ; CHECK-SD: // %bb.0:
2442 ; CHECK-SD-NEXT: ldr d0, [x0]
2443 ; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1
2444 ; CHECK-SD-NEXT: ret
2446 ; CHECK-GI-LABEL: neon_ushll2d_constant_shift:
2447 ; CHECK-GI: // %bb.0:
2448 ; CHECK-GI-NEXT: ldr d0, [x0]
2449 ; CHECK-GI-NEXT: adrp x8, .LCPI161_0
2450 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI161_0]
2451 ; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
2452 ; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v1.2d
2453 ; CHECK-GI-NEXT: ret
2454 %tmp1 = load <2 x i32>, ptr %A
2455 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
2456 %tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
2460 define <1 x i64> @neon_ushl_vscalar_constant_shift(ptr %A) nounwind {
2461 ; CHECK-LABEL: neon_ushl_vscalar_constant_shift:
2463 ; CHECK-NEXT: movi v0.2d, #0000000000000000
2464 ; CHECK-NEXT: ldr s1, [x0]
2465 ; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s
2466 ; CHECK-NEXT: shl d0, d0, #1
2468 %tmp1 = load <1 x i32>, ptr %A
2469 %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
2470 %tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
2474 define i64 @neon_ushl_scalar_constant_shift(ptr %A) nounwind {
2475 ; CHECK-LABEL: neon_ushl_scalar_constant_shift:
2477 ; CHECK-NEXT: ldr w8, [x0]
2478 ; CHECK-NEXT: fmov d0, x8
2479 ; CHECK-NEXT: shl d0, d0, #1
2480 ; CHECK-NEXT: fmov x0, d0
2482 %tmp1 = load i32, ptr %A
2483 %tmp2 = zext i32 %tmp1 to i64
2484 %tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1)
2488 define <8 x i16> @sshll8h(ptr %A) nounwind {
2489 ; CHECK-LABEL: sshll8h:
2491 ; CHECK-NEXT: ldr d0, [x0]
2492 ; CHECK-NEXT: sshll v0.8h, v0.8b, #1
2494 %tmp1 = load <8 x i8>, ptr %A
2495 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
2496 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2500 define <2 x i64> @sshll2d(ptr %A) nounwind {
2501 ; CHECK-LABEL: sshll2d:
2503 ; CHECK-NEXT: ldr d0, [x0]
2504 ; CHECK-NEXT: sshll v0.2d, v0.2s, #1
2506 %tmp1 = load <2 x i32>, ptr %A
2507 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
2508 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2512 declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>)
2513 declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>)
2514 declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>)
2515 declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>)
2516 declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>)
2517 declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64)
2519 define <16 x i8> @neon_sshl16b_constant_shift(ptr %A) nounwind {
2520 ; CHECK-SD-LABEL: neon_sshl16b_constant_shift:
2521 ; CHECK-SD: // %bb.0:
2522 ; CHECK-SD-NEXT: ldr q0, [x0]
2523 ; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b
2524 ; CHECK-SD-NEXT: ret
2526 ; CHECK-GI-LABEL: neon_sshl16b_constant_shift:
2527 ; CHECK-GI: // %bb.0:
2528 ; CHECK-GI-NEXT: movi v0.16b, #1
2529 ; CHECK-GI-NEXT: ldr q1, [x0]
2530 ; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b
2531 ; CHECK-GI-NEXT: ret
2532 %tmp1 = load <16 x i8>, ptr %A
2533 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2537 define <16 x i8> @neon_sshl16b_non_splat_constant_shift(ptr %A) nounwind {
2538 ; CHECK-SD-LABEL: neon_sshl16b_non_splat_constant_shift:
2539 ; CHECK-SD: // %bb.0:
2540 ; CHECK-SD-NEXT: adrp x8, .LCPI167_0
2541 ; CHECK-SD-NEXT: ldr q0, [x0]
2542 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI167_0]
2543 ; CHECK-SD-NEXT: sshl v0.16b, v0.16b, v1.16b
2544 ; CHECK-SD-NEXT: ret
2546 ; CHECK-GI-LABEL: neon_sshl16b_non_splat_constant_shift:
2547 ; CHECK-GI: // %bb.0:
2548 ; CHECK-GI-NEXT: adrp x8, .LCPI167_0
2549 ; CHECK-GI-NEXT: ldr q1, [x0]
2550 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI167_0]
2551 ; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b
2552 ; CHECK-GI-NEXT: ret
2553 %tmp1 = load <16 x i8>, ptr %A
2554 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2558 define <16 x i8> @neon_sshl16b_neg_constant_shift(ptr %A) nounwind {
2559 ; CHECK-SD-LABEL: neon_sshl16b_neg_constant_shift:
2560 ; CHECK-SD: // %bb.0:
2561 ; CHECK-SD-NEXT: ldr q0, [x0]
2562 ; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2
2563 ; CHECK-SD-NEXT: ret
2565 ; CHECK-GI-LABEL: neon_sshl16b_neg_constant_shift:
2566 ; CHECK-GI: // %bb.0:
2567 ; CHECK-GI-NEXT: movi v0.16b, #254
2568 ; CHECK-GI-NEXT: ldr q1, [x0]
2569 ; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b
2570 ; CHECK-GI-NEXT: ret
2571 %tmp1 = load <16 x i8>, ptr %A
2572 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
2576 define <8 x i16> @neon_sshll8h_constant_shift(ptr %A) nounwind {
2577 ; CHECK-SD-LABEL: neon_sshll8h_constant_shift:
2578 ; CHECK-SD: // %bb.0:
2579 ; CHECK-SD-NEXT: ldr d0, [x0]
2580 ; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1
2581 ; CHECK-SD-NEXT: ret
2583 ; CHECK-GI-LABEL: neon_sshll8h_constant_shift:
2584 ; CHECK-GI: // %bb.0:
2585 ; CHECK-GI-NEXT: ldr d0, [x0]
2586 ; CHECK-GI-NEXT: movi v1.8h, #1
2587 ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
2588 ; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h
2589 ; CHECK-GI-NEXT: ret
2590 %tmp1 = load <8 x i8>, ptr %A
2591 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
2592 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2596 define <4 x i32> @neon_sshl4s_wrong_ext_constant_shift(ptr %A) nounwind {
2597 ; CHECK-SD-LABEL: neon_sshl4s_wrong_ext_constant_shift:
2598 ; CHECK-SD: // %bb.0:
2599 ; CHECK-SD-NEXT: ldr s0, [x0]
2600 ; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
2601 ; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1
2602 ; CHECK-SD-NEXT: ret
2604 ; CHECK-GI-LABEL: neon_sshl4s_wrong_ext_constant_shift:
2605 ; CHECK-GI: // %bb.0:
2606 ; CHECK-GI-NEXT: ldr w8, [x0]
2607 ; CHECK-GI-NEXT: movi v0.4s, #1
2608 ; CHECK-GI-NEXT: fmov s1, w8
2609 ; CHECK-GI-NEXT: sxtb w8, w8
2610 ; CHECK-GI-NEXT: mov b2, v1.b[2]
2611 ; CHECK-GI-NEXT: mov b3, v1.b[1]
2612 ; CHECK-GI-NEXT: mov b4, v1.b[3]
2613 ; CHECK-GI-NEXT: fmov s1, w8
2614 ; CHECK-GI-NEXT: fmov w9, s2
2615 ; CHECK-GI-NEXT: fmov w10, s3
2616 ; CHECK-GI-NEXT: fmov w11, s4
2617 ; CHECK-GI-NEXT: sxtb w9, w9
2618 ; CHECK-GI-NEXT: sxtb w10, w10
2619 ; CHECK-GI-NEXT: sxtb w11, w11
2620 ; CHECK-GI-NEXT: fmov s2, w9
2621 ; CHECK-GI-NEXT: mov v1.h[1], w10
2622 ; CHECK-GI-NEXT: mov v2.h[1], w11
2623 ; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
2624 ; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0
2625 ; CHECK-GI-NEXT: mov v1.d[1], v2.d[0]
2626 ; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s
2627 ; CHECK-GI-NEXT: ret
2628 %tmp1 = load <4 x i8>, ptr %A
2629 %tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
2630 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2634 define <4 x i32> @neon_sshll4s_constant_shift(ptr %A) nounwind {
2635 ; CHECK-SD-LABEL: neon_sshll4s_constant_shift:
2636 ; CHECK-SD: // %bb.0:
2637 ; CHECK-SD-NEXT: ldr d0, [x0]
2638 ; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1
2639 ; CHECK-SD-NEXT: ret
2641 ; CHECK-GI-LABEL: neon_sshll4s_constant_shift:
2642 ; CHECK-GI: // %bb.0:
2643 ; CHECK-GI-NEXT: ldr d0, [x0]
2644 ; CHECK-GI-NEXT: movi v1.4s, #1
2645 ; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
2646 ; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
2647 ; CHECK-GI-NEXT: ret
2648 %tmp1 = load <4 x i16>, ptr %A
2649 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
2650 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2654 define <4 x i32> @neon_sshll4s_neg_constant_shift(ptr %A) nounwind {
2655 ; CHECK-SD-LABEL: neon_sshll4s_neg_constant_shift:
2656 ; CHECK-SD: // %bb.0:
2657 ; CHECK-SD-NEXT: ldr d0, [x0]
2658 ; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
2659 ; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #1
2660 ; CHECK-SD-NEXT: ret
2662 ; CHECK-GI-LABEL: neon_sshll4s_neg_constant_shift:
2663 ; CHECK-GI: // %bb.0:
2664 ; CHECK-GI-NEXT: ldr d0, [x0]
2665 ; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
2666 ; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
2667 ; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
2668 ; CHECK-GI-NEXT: ret
2669 %tmp1 = load <4 x i16>, ptr %A
2670 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
2671 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
2675 ; FIXME: should be constant folded.
2676 define <4 x i32> @neon_sshl4s_constant_fold() nounwind {
2677 ; CHECK-SD-LABEL: neon_sshl4s_constant_fold:
2678 ; CHECK-SD: // %bb.0:
2679 ; CHECK-SD-NEXT: adrp x8, .LCPI173_0
2680 ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI173_0]
2681 ; CHECK-SD-NEXT: shl v0.4s, v0.4s, #2
2682 ; CHECK-SD-NEXT: ret
2684 ; CHECK-GI-LABEL: neon_sshl4s_constant_fold:
2685 ; CHECK-GI: // %bb.0:
2686 ; CHECK-GI-NEXT: movi v0.4s, #2
2687 ; CHECK-GI-NEXT: adrp x8, .LCPI173_0
2688 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI173_0]
2689 ; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s
2690 ; CHECK-GI-NEXT: ret
2691 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
2695 define <4 x i32> @neon_sshl4s_no_fold(ptr %A) nounwind {
2696 ; CHECK-SD-LABEL: neon_sshl4s_no_fold:
2697 ; CHECK-SD: // %bb.0:
2698 ; CHECK-SD-NEXT: ldr q0, [x0]
2699 ; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s
2700 ; CHECK-SD-NEXT: ret
2702 ; CHECK-GI-LABEL: neon_sshl4s_no_fold:
2703 ; CHECK-GI: // %bb.0:
2704 ; CHECK-GI-NEXT: movi v0.4s, #1
2705 ; CHECK-GI-NEXT: ldr q1, [x0]
2706 ; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s
2707 ; CHECK-GI-NEXT: ret
2708 %tmp1 = load <4 x i32>, ptr %A
2709 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2713 define <2 x i64> @neon_sshll2d_constant_shift(ptr %A) nounwind {
2714 ; CHECK-SD-LABEL: neon_sshll2d_constant_shift:
2715 ; CHECK-SD: // %bb.0:
2716 ; CHECK-SD-NEXT: ldr d0, [x0]
2717 ; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1
2718 ; CHECK-SD-NEXT: ret
2720 ; CHECK-GI-LABEL: neon_sshll2d_constant_shift:
2721 ; CHECK-GI: // %bb.0:
2722 ; CHECK-GI-NEXT: ldr d0, [x0]
2723 ; CHECK-GI-NEXT: adrp x8, .LCPI175_0
2724 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI175_0]
2725 ; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
2726 ; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d
2727 ; CHECK-GI-NEXT: ret
2728 %tmp1 = load <2 x i32>, ptr %A
2729 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
2730 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
2734 define <1 x i64> @neon_sshll_vscalar_constant_shift(ptr %A) nounwind {
2735 ; CHECK-LABEL: neon_sshll_vscalar_constant_shift:
2737 ; CHECK-NEXT: movi v0.2d, #0000000000000000
2738 ; CHECK-NEXT: ldr s1, [x0]
2739 ; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s
2740 ; CHECK-NEXT: shl d0, d0, #1
2742 %tmp1 = load <1 x i32>, ptr %A
2743 %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
2744 %tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
2748 define i64 @neon_sshll_scalar_constant_shift(ptr %A) nounwind {
2749 ; CHECK-LABEL: neon_sshll_scalar_constant_shift:
2751 ; CHECK-NEXT: ldr w8, [x0]
2752 ; CHECK-NEXT: fmov d0, x8
2753 ; CHECK-NEXT: shl d0, d0, #1
2754 ; CHECK-NEXT: fmov x0, d0
2756 %tmp1 = load i32, ptr %A
2757 %tmp2 = zext i32 %tmp1 to i64
2758 %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1)
2762 define i64 @neon_sshll_scalar_constant_shift_m1(ptr %A) nounwind {
2763 ; CHECK-LABEL: neon_sshll_scalar_constant_shift_m1:
2765 ; CHECK-NEXT: ldr w8, [x0]
2766 ; CHECK-NEXT: fmov d0, x8
2767 ; CHECK-NEXT: sshr d0, d0, #1
2768 ; CHECK-NEXT: fmov x0, d0
2770 %tmp1 = load i32, ptr %A
2771 %tmp2 = zext i32 %tmp1 to i64
2772 %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 -1)
2776 ; FIXME: should be constant folded.
2777 define <2 x i64> @neon_sshl2d_constant_fold() nounwind {
2778 ; CHECK-SD-LABEL: neon_sshl2d_constant_fold:
2779 ; CHECK-SD: // %bb.0:
2780 ; CHECK-SD-NEXT: adrp x8, .LCPI179_0
2781 ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI179_0]
2782 ; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d
2783 ; CHECK-SD-NEXT: ret
2785 ; CHECK-GI-LABEL: neon_sshl2d_constant_fold:
2786 ; CHECK-GI: // %bb.0:
2787 ; CHECK-GI-NEXT: adrp x8, .LCPI179_1
2788 ; CHECK-GI-NEXT: adrp x9, .LCPI179_0
2789 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI179_1]
2790 ; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI179_0]
2791 ; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d
2792 ; CHECK-GI-NEXT: ret
2793 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
2797 define <2 x i64> @neon_sshl2d_no_fold(ptr %A) nounwind {
2798 ; CHECK-SD-LABEL: neon_sshl2d_no_fold:
2799 ; CHECK-SD: // %bb.0:
2800 ; CHECK-SD-NEXT: ldr q0, [x0]
2801 ; CHECK-SD-NEXT: shl v0.2d, v0.2d, #2
2802 ; CHECK-SD-NEXT: ret
2804 ; CHECK-GI-LABEL: neon_sshl2d_no_fold:
2805 ; CHECK-GI: // %bb.0:
2806 ; CHECK-GI-NEXT: adrp x8, .LCPI180_0
2807 ; CHECK-GI-NEXT: ldr q1, [x0]
2808 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI180_0]
2809 ; CHECK-GI-NEXT: sshl v0.2d, v1.2d, v0.2d
2810 ; CHECK-GI-NEXT: ret
2811 %tmp2 = load <2 x i64>, ptr %A
2812 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 2, i64 2>)
2816 define <8 x i16> @sshll2_8h(ptr %A) nounwind {
2817 ; CHECK-SD-LABEL: sshll2_8h:
2818 ; CHECK-SD: // %bb.0:
2819 ; CHECK-SD-NEXT: ldr d0, [x0, #8]
2820 ; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1
2821 ; CHECK-SD-NEXT: ret
2823 ; CHECK-GI-LABEL: sshll2_8h:
2824 ; CHECK-GI: // %bb.0:
2825 ; CHECK-GI-NEXT: ldr q0, [x0]
2826 ; CHECK-GI-NEXT: mov d0, v0.d[1]
2827 ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #1
2828 ; CHECK-GI-NEXT: ret
2829 %load1 = load <16 x i8>, ptr %A
2830 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2831 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
2832 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2836 define <4 x i32> @sshll2_4s(ptr %A) nounwind {
2837 ; CHECK-SD-LABEL: sshll2_4s:
2838 ; CHECK-SD: // %bb.0:
2839 ; CHECK-SD-NEXT: ldr d0, [x0, #8]
2840 ; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1
2841 ; CHECK-SD-NEXT: ret
2843 ; CHECK-GI-LABEL: sshll2_4s:
2844 ; CHECK-GI: // %bb.0:
2845 ; CHECK-GI-NEXT: ldr q0, [x0]
2846 ; CHECK-GI-NEXT: mov d0, v0.d[1]
2847 ; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #1
2848 ; CHECK-GI-NEXT: ret
2849 %load1 = load <8 x i16>, ptr %A
2850 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2851 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
2852 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
2856 define <2 x i64> @sshll2_2d(ptr %A) nounwind {
2857 ; CHECK-SD-LABEL: sshll2_2d:
2858 ; CHECK-SD: // %bb.0:
2859 ; CHECK-SD-NEXT: ldr d0, [x0, #8]
2860 ; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1
2861 ; CHECK-SD-NEXT: ret
2863 ; CHECK-GI-LABEL: sshll2_2d:
2864 ; CHECK-GI: // %bb.0:
2865 ; CHECK-GI-NEXT: ldr q0, [x0]
2866 ; CHECK-GI-NEXT: mov d0, v0.d[1]
2867 ; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #1
2868 ; CHECK-GI-NEXT: ret
2869 %load1 = load <4 x i32>, ptr %A
2870 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2871 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
2872 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
2876 define <8 x i8> @sqshli8b(ptr %A) nounwind {
2877 ; CHECK-SD-LABEL: sqshli8b:
2878 ; CHECK-SD: // %bb.0:
2879 ; CHECK-SD-NEXT: ldr d0, [x0]
2880 ; CHECK-SD-NEXT: sqshl v0.8b, v0.8b, #1
2881 ; CHECK-SD-NEXT: ret
2883 ; CHECK-GI-LABEL: sqshli8b:
2884 ; CHECK-GI: // %bb.0:
2885 ; CHECK-GI-NEXT: movi v0.8b, #1
2886 ; CHECK-GI-NEXT: ldr d1, [x0]
2887 ; CHECK-GI-NEXT: sqshl v0.8b, v1.8b, v0.8b
2888 ; CHECK-GI-NEXT: ret
2889 %tmp1 = load <8 x i8>, ptr %A
2890 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2894 define <4 x i16> @sqshli4h(ptr %A) nounwind {
2895 ; CHECK-SD-LABEL: sqshli4h:
2896 ; CHECK-SD: // %bb.0:
2897 ; CHECK-SD-NEXT: ldr d0, [x0]
2898 ; CHECK-SD-NEXT: sqshl v0.4h, v0.4h, #1
2899 ; CHECK-SD-NEXT: ret
2901 ; CHECK-GI-LABEL: sqshli4h:
2902 ; CHECK-GI: // %bb.0:
2903 ; CHECK-GI-NEXT: movi v0.4h, #1
2904 ; CHECK-GI-NEXT: ldr d1, [x0]
2905 ; CHECK-GI-NEXT: sqshl v0.4h, v1.4h, v0.4h
2906 ; CHECK-GI-NEXT: ret
2907 %tmp1 = load <4 x i16>, ptr %A
2908 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
2912 define <2 x i32> @sqshli2s(ptr %A) nounwind {
2913 ; CHECK-SD-LABEL: sqshli2s:
2914 ; CHECK-SD: // %bb.0:
2915 ; CHECK-SD-NEXT: ldr d0, [x0]
2916 ; CHECK-SD-NEXT: sqshl v0.2s, v0.2s, #1
2917 ; CHECK-SD-NEXT: ret
2919 ; CHECK-GI-LABEL: sqshli2s:
2920 ; CHECK-GI: // %bb.0:
2921 ; CHECK-GI-NEXT: movi v0.2s, #1
2922 ; CHECK-GI-NEXT: ldr d1, [x0]
2923 ; CHECK-GI-NEXT: sqshl v0.2s, v1.2s, v0.2s
2924 ; CHECK-GI-NEXT: ret
2925 %tmp1 = load <2 x i32>, ptr %A
2926 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
2930 define <16 x i8> @sqshli16b(ptr %A) nounwind {
2931 ; CHECK-SD-LABEL: sqshli16b:
2932 ; CHECK-SD: // %bb.0:
2933 ; CHECK-SD-NEXT: ldr q0, [x0]
2934 ; CHECK-SD-NEXT: sqshl v0.16b, v0.16b, #1
2935 ; CHECK-SD-NEXT: ret
2937 ; CHECK-GI-LABEL: sqshli16b:
2938 ; CHECK-GI: // %bb.0:
2939 ; CHECK-GI-NEXT: movi v0.16b, #1
2940 ; CHECK-GI-NEXT: ldr q1, [x0]
2941 ; CHECK-GI-NEXT: sqshl v0.16b, v1.16b, v0.16b
2942 ; CHECK-GI-NEXT: ret
2943 %tmp1 = load <16 x i8>, ptr %A
2944 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
2948 define <8 x i16> @sqshli8h(ptr %A) nounwind {
2949 ; CHECK-SD-LABEL: sqshli8h:
2950 ; CHECK-SD: // %bb.0:
2951 ; CHECK-SD-NEXT: ldr q0, [x0]
2952 ; CHECK-SD-NEXT: sqshl v0.8h, v0.8h, #1
2953 ; CHECK-SD-NEXT: ret
2955 ; CHECK-GI-LABEL: sqshli8h:
2956 ; CHECK-GI: // %bb.0:
2957 ; CHECK-GI-NEXT: movi v0.8h, #1
2958 ; CHECK-GI-NEXT: ldr q1, [x0]
2959 ; CHECK-GI-NEXT: sqshl v0.8h, v1.8h, v0.8h
2960 ; CHECK-GI-NEXT: ret
2961 %tmp1 = load <8 x i16>, ptr %A
2962 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
2966 define <4 x i32> @sqshli4s(ptr %A) nounwind {
2967 ; CHECK-SD-LABEL: sqshli4s:
2968 ; CHECK-SD: // %bb.0:
2969 ; CHECK-SD-NEXT: ldr q0, [x0]
2970 ; CHECK-SD-NEXT: sqshl v0.4s, v0.4s, #1
2971 ; CHECK-SD-NEXT: ret
2973 ; CHECK-GI-LABEL: sqshli4s:
2974 ; CHECK-GI: // %bb.0:
2975 ; CHECK-GI-NEXT: movi v0.4s, #1
2976 ; CHECK-GI-NEXT: ldr q1, [x0]
2977 ; CHECK-GI-NEXT: sqshl v0.4s, v1.4s, v0.4s
2978 ; CHECK-GI-NEXT: ret
2979 %tmp1 = load <4 x i32>, ptr %A
2980 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
2984 define <2 x i64> @sqshli2d(ptr %A) nounwind {
2985 ; CHECK-SD-LABEL: sqshli2d:
2986 ; CHECK-SD: // %bb.0:
2987 ; CHECK-SD-NEXT: ldr q0, [x0]
2988 ; CHECK-SD-NEXT: sqshl v0.2d, v0.2d, #1
2989 ; CHECK-SD-NEXT: ret
2991 ; CHECK-GI-LABEL: sqshli2d:
2992 ; CHECK-GI: // %bb.0:
2993 ; CHECK-GI-NEXT: adrp x8, .LCPI190_0
2994 ; CHECK-GI-NEXT: ldr q1, [x0]
2995 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI190_0]
2996 ; CHECK-GI-NEXT: sqshl v0.2d, v1.2d, v0.2d
2997 ; CHECK-GI-NEXT: ret
2998 %tmp1 = load <2 x i64>, ptr %A
2999 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
3003 define <8 x i8> @uqshli8b(ptr %A) nounwind {
3004 ; CHECK-SD-LABEL: uqshli8b:
3005 ; CHECK-SD: // %bb.0:
3006 ; CHECK-SD-NEXT: ldr d0, [x0]
3007 ; CHECK-SD-NEXT: uqshl v0.8b, v0.8b, #1
3008 ; CHECK-SD-NEXT: ret
3010 ; CHECK-GI-LABEL: uqshli8b:
3011 ; CHECK-GI: // %bb.0:
3012 ; CHECK-GI-NEXT: movi v0.8b, #1
3013 ; CHECK-GI-NEXT: ldr d1, [x0]
3014 ; CHECK-GI-NEXT: uqshl v0.8b, v1.8b, v0.8b
3015 ; CHECK-GI-NEXT: ret
3016 %tmp1 = load <8 x i8>, ptr %A
3017 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
3021 define <8 x i8> @uqshli8b_1(ptr %A) nounwind {
3022 ; CHECK-LABEL: uqshli8b_1:
3024 ; CHECK-NEXT: movi v0.8b, #8
3025 ; CHECK-NEXT: ldr d1, [x0]
3026 ; CHECK-NEXT: uqshl v0.8b, v1.8b, v0.8b
3028 %tmp1 = load <8 x i8>, ptr %A
3029 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
3033 define <4 x i16> @uqshli4h(ptr %A) nounwind {
3034 ; CHECK-SD-LABEL: uqshli4h:
3035 ; CHECK-SD: // %bb.0:
3036 ; CHECK-SD-NEXT: ldr d0, [x0]
3037 ; CHECK-SD-NEXT: uqshl v0.4h, v0.4h, #1
3038 ; CHECK-SD-NEXT: ret
3040 ; CHECK-GI-LABEL: uqshli4h:
3041 ; CHECK-GI: // %bb.0:
3042 ; CHECK-GI-NEXT: movi v0.4h, #1
3043 ; CHECK-GI-NEXT: ldr d1, [x0]
3044 ; CHECK-GI-NEXT: uqshl v0.4h, v1.4h, v0.4h
3045 ; CHECK-GI-NEXT: ret
3046 %tmp1 = load <4 x i16>, ptr %A
3047 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
3051 define <2 x i32> @uqshli2s(ptr %A) nounwind {
3052 ; CHECK-SD-LABEL: uqshli2s:
3053 ; CHECK-SD: // %bb.0:
3054 ; CHECK-SD-NEXT: ldr d0, [x0]
3055 ; CHECK-SD-NEXT: uqshl v0.2s, v0.2s, #1
3056 ; CHECK-SD-NEXT: ret
3058 ; CHECK-GI-LABEL: uqshli2s:
3059 ; CHECK-GI: // %bb.0:
3060 ; CHECK-GI-NEXT: movi v0.2s, #1
3061 ; CHECK-GI-NEXT: ldr d1, [x0]
3062 ; CHECK-GI-NEXT: uqshl v0.2s, v1.2s, v0.2s
3063 ; CHECK-GI-NEXT: ret
3064 %tmp1 = load <2 x i32>, ptr %A
3065 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
3069 define <16 x i8> @uqshli16b(ptr %A) nounwind {
3070 ; CHECK-SD-LABEL: uqshli16b:
3071 ; CHECK-SD: // %bb.0:
3072 ; CHECK-SD-NEXT: ldr q0, [x0]
3073 ; CHECK-SD-NEXT: uqshl v0.16b, v0.16b, #1
3074 ; CHECK-SD-NEXT: ret
3076 ; CHECK-GI-LABEL: uqshli16b:
3077 ; CHECK-GI: // %bb.0:
3078 ; CHECK-GI-NEXT: movi v0.16b, #1
3079 ; CHECK-GI-NEXT: ldr q1, [x0]
3080 ; CHECK-GI-NEXT: uqshl v0.16b, v1.16b, v0.16b
3081 ; CHECK-GI-NEXT: ret
3082 %tmp1 = load <16 x i8>, ptr %A
3083 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
3087 define <8 x i16> @uqshli8h(ptr %A) nounwind {
3088 ; CHECK-SD-LABEL: uqshli8h:
3089 ; CHECK-SD: // %bb.0:
3090 ; CHECK-SD-NEXT: ldr q0, [x0]
3091 ; CHECK-SD-NEXT: uqshl v0.8h, v0.8h, #1
3092 ; CHECK-SD-NEXT: ret
3094 ; CHECK-GI-LABEL: uqshli8h:
3095 ; CHECK-GI: // %bb.0:
3096 ; CHECK-GI-NEXT: movi v0.8h, #1
3097 ; CHECK-GI-NEXT: ldr q1, [x0]
3098 ; CHECK-GI-NEXT: uqshl v0.8h, v1.8h, v0.8h
3099 ; CHECK-GI-NEXT: ret
3100 %tmp1 = load <8 x i16>, ptr %A
3101 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
3105 define <4 x i32> @uqshli4s(ptr %A) nounwind {
3106 ; CHECK-SD-LABEL: uqshli4s:
3107 ; CHECK-SD: // %bb.0:
3108 ; CHECK-SD-NEXT: ldr q0, [x0]
3109 ; CHECK-SD-NEXT: uqshl v0.4s, v0.4s, #1
3110 ; CHECK-SD-NEXT: ret
3112 ; CHECK-GI-LABEL: uqshli4s:
3113 ; CHECK-GI: // %bb.0:
3114 ; CHECK-GI-NEXT: movi v0.4s, #1
3115 ; CHECK-GI-NEXT: ldr q1, [x0]
3116 ; CHECK-GI-NEXT: uqshl v0.4s, v1.4s, v0.4s
3117 ; CHECK-GI-NEXT: ret
3118 %tmp1 = load <4 x i32>, ptr %A
3119 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
3123 define <2 x i64> @uqshli2d(ptr %A) nounwind {
3124 ; CHECK-SD-LABEL: uqshli2d:
3125 ; CHECK-SD: // %bb.0:
3126 ; CHECK-SD-NEXT: ldr q0, [x0]
3127 ; CHECK-SD-NEXT: uqshl v0.2d, v0.2d, #1
3128 ; CHECK-SD-NEXT: ret
3130 ; CHECK-GI-LABEL: uqshli2d:
3131 ; CHECK-GI: // %bb.0:
3132 ; CHECK-GI-NEXT: adrp x8, .LCPI198_0
3133 ; CHECK-GI-NEXT: ldr q1, [x0]
3134 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI198_0]
3135 ; CHECK-GI-NEXT: uqshl v0.2d, v1.2d, v0.2d
3136 ; CHECK-GI-NEXT: ret
3137 %tmp1 = load <2 x i64>, ptr %A
3138 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
3142 define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind {
3143 ; CHECK-SD-LABEL: ursra8b:
3144 ; CHECK-SD: // %bb.0:
3145 ; CHECK-SD-NEXT: ldr d1, [x0]
3146 ; CHECK-SD-NEXT: ldr d0, [x1]
3147 ; CHECK-SD-NEXT: ursra v0.8b, v1.8b, #1
3148 ; CHECK-SD-NEXT: ret
3150 ; CHECK-GI-LABEL: ursra8b:
3151 ; CHECK-GI: // %bb.0:
3152 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
3153 ; CHECK-GI-NEXT: ldr d1, [x0]
3154 ; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b
3155 ; CHECK-GI-NEXT: ldr d1, [x1]
3156 ; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b
3157 ; CHECK-GI-NEXT: ret
3158 %tmp1 = load <8 x i8>, ptr %A
3159 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
3160 %tmp4 = load <8 x i8>, ptr %B
3161 %tmp5 = add <8 x i8> %tmp3, %tmp4
3165 define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind {
3166 ; CHECK-SD-LABEL: ursra4h:
3167 ; CHECK-SD: // %bb.0:
3168 ; CHECK-SD-NEXT: ldr d1, [x0]
3169 ; CHECK-SD-NEXT: ldr d0, [x1]
3170 ; CHECK-SD-NEXT: ursra v0.4h, v1.4h, #1
3171 ; CHECK-SD-NEXT: ret
3173 ; CHECK-GI-LABEL: ursra4h:
3174 ; CHECK-GI: // %bb.0:
3175 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
3176 ; CHECK-GI-NEXT: ldr d1, [x0]
3177 ; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h
3178 ; CHECK-GI-NEXT: ldr d1, [x1]
3179 ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
3180 ; CHECK-GI-NEXT: ret
3181 %tmp1 = load <4 x i16>, ptr %A
3182 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
3183 %tmp4 = load <4 x i16>, ptr %B
3184 %tmp5 = add <4 x i16> %tmp3, %tmp4
3188 define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind {
3189 ; CHECK-SD-LABEL: ursra2s:
3190 ; CHECK-SD: // %bb.0:
3191 ; CHECK-SD-NEXT: ldr d1, [x0]
3192 ; CHECK-SD-NEXT: ldr d0, [x1]
3193 ; CHECK-SD-NEXT: ursra v0.2s, v1.2s, #1
3194 ; CHECK-SD-NEXT: ret
3196 ; CHECK-GI-LABEL: ursra2s:
3197 ; CHECK-GI: // %bb.0:
3198 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
3199 ; CHECK-GI-NEXT: ldr d1, [x0]
3200 ; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s
3201 ; CHECK-GI-NEXT: ldr d1, [x1]
3202 ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
3203 ; CHECK-GI-NEXT: ret
3204 %tmp1 = load <2 x i32>, ptr %A
3205 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
3206 %tmp4 = load <2 x i32>, ptr %B
3207 %tmp5 = add <2 x i32> %tmp3, %tmp4
3211 define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind {
3212 ; CHECK-SD-LABEL: ursra16b:
3213 ; CHECK-SD: // %bb.0:
3214 ; CHECK-SD-NEXT: ldr q1, [x0]
3215 ; CHECK-SD-NEXT: ldr q0, [x1]
3216 ; CHECK-SD-NEXT: ursra v0.16b, v1.16b, #1
3217 ; CHECK-SD-NEXT: ret
3219 ; CHECK-GI-LABEL: ursra16b:
3220 ; CHECK-GI: // %bb.0:
3221 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
3222 ; CHECK-GI-NEXT: ldr q1, [x0]
3223 ; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b
3224 ; CHECK-GI-NEXT: ldr q1, [x1]
3225 ; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
3226 ; CHECK-GI-NEXT: ret
3227 %tmp1 = load <16 x i8>, ptr %A
3228 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
3229 %tmp4 = load <16 x i8>, ptr %B
3230 %tmp5 = add <16 x i8> %tmp3, %tmp4
3234 define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind {
3235 ; CHECK-SD-LABEL: ursra8h:
3236 ; CHECK-SD: // %bb.0:
3237 ; CHECK-SD-NEXT: ldr q1, [x0]
3238 ; CHECK-SD-NEXT: ldr q0, [x1]
3239 ; CHECK-SD-NEXT: ursra v0.8h, v1.8h, #1
3240 ; CHECK-SD-NEXT: ret
3242 ; CHECK-GI-LABEL: ursra8h:
3243 ; CHECK-GI: // %bb.0:
3244 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
3245 ; CHECK-GI-NEXT: ldr q1, [x0]
3246 ; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h
3247 ; CHECK-GI-NEXT: ldr q1, [x1]
3248 ; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
3249 ; CHECK-GI-NEXT: ret
3250 %tmp1 = load <8 x i16>, ptr %A
3251 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
3252 %tmp4 = load <8 x i16>, ptr %B
3253 %tmp5 = add <8 x i16> %tmp3, %tmp4
3257 define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind {
3258 ; CHECK-SD-LABEL: ursra4s:
3259 ; CHECK-SD: // %bb.0:
3260 ; CHECK-SD-NEXT: ldr q1, [x0]
3261 ; CHECK-SD-NEXT: ldr q0, [x1]
3262 ; CHECK-SD-NEXT: ursra v0.4s, v1.4s, #1
3263 ; CHECK-SD-NEXT: ret
3265 ; CHECK-GI-LABEL: ursra4s:
3266 ; CHECK-GI: // %bb.0:
3267 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
3268 ; CHECK-GI-NEXT: ldr q1, [x0]
3269 ; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s
3270 ; CHECK-GI-NEXT: ldr q1, [x1]
3271 ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
3272 ; CHECK-GI-NEXT: ret
3273 %tmp1 = load <4 x i32>, ptr %A
3274 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
3275 %tmp4 = load <4 x i32>, ptr %B
3276 %tmp5 = add <4 x i32> %tmp3, %tmp4
3280 define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind {
3281 ; CHECK-SD-LABEL: ursra2d:
3282 ; CHECK-SD: // %bb.0:
3283 ; CHECK-SD-NEXT: ldr q1, [x0]
3284 ; CHECK-SD-NEXT: ldr q0, [x1]
3285 ; CHECK-SD-NEXT: ursra v0.2d, v1.2d, #1
3286 ; CHECK-SD-NEXT: ret
3288 ; CHECK-GI-LABEL: ursra2d:
3289 ; CHECK-GI: // %bb.0:
3290 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
3291 ; CHECK-GI-NEXT: ldr q1, [x0]
3292 ; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d
3293 ; CHECK-GI-NEXT: ldr q1, [x1]
3294 ; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
3295 ; CHECK-GI-NEXT: ret
3296 %tmp1 = load <2 x i64>, ptr %A
3297 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
3298 %tmp4 = load <2 x i64>, ptr %B
3299 %tmp5 = add <2 x i64> %tmp3, %tmp4
3303 define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind {
3304 ; CHECK-LABEL: ursra1d:
3306 ; CHECK-NEXT: ldr d1, [x0]
3307 ; CHECK-NEXT: ldr d0, [x1]
3308 ; CHECK-NEXT: ursra d0, d1, #1
3310 %tmp1 = load <1 x i64>, ptr %A
3311 %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
3312 %tmp4 = load <1 x i64>, ptr %B
3313 %tmp5 = add <1 x i64> %tmp3, %tmp4
3317 define i64 @ursra_scalar(ptr %A, ptr %B) nounwind {
3318 ; CHECK-LABEL: ursra_scalar:
3320 ; CHECK-NEXT: ldr d0, [x0]
3321 ; CHECK-NEXT: ldr d1, [x1]
3322 ; CHECK-NEXT: ursra d1, d0, #1
3323 ; CHECK-NEXT: fmov x0, d1
3325 %tmp1 = load i64, ptr %A
3326 %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
3327 %tmp4 = load i64, ptr %B
3328 %tmp5 = add i64 %tmp3, %tmp4
3332 define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind {
3333 ; CHECK-SD-LABEL: srsra8b:
3334 ; CHECK-SD: // %bb.0:
3335 ; CHECK-SD-NEXT: ldr d1, [x0]
3336 ; CHECK-SD-NEXT: ldr d0, [x1]
3337 ; CHECK-SD-NEXT: srsra v0.8b, v1.8b, #1
3338 ; CHECK-SD-NEXT: ret
3340 ; CHECK-GI-LABEL: srsra8b:
3341 ; CHECK-GI: // %bb.0:
3342 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
3343 ; CHECK-GI-NEXT: ldr d1, [x0]
3344 ; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b
3345 ; CHECK-GI-NEXT: ldr d1, [x1]
3346 ; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b
3347 ; CHECK-GI-NEXT: ret
3348 %tmp1 = load <8 x i8>, ptr %A
3349 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
3350 %tmp4 = load <8 x i8>, ptr %B
3351 %tmp5 = add <8 x i8> %tmp3, %tmp4
3355 define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind {
3356 ; CHECK-SD-LABEL: srsra4h:
3357 ; CHECK-SD: // %bb.0:
3358 ; CHECK-SD-NEXT: ldr d1, [x0]
3359 ; CHECK-SD-NEXT: ldr d0, [x1]
3360 ; CHECK-SD-NEXT: srsra v0.4h, v1.4h, #1
3361 ; CHECK-SD-NEXT: ret
3363 ; CHECK-GI-LABEL: srsra4h:
3364 ; CHECK-GI: // %bb.0:
3365 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
3366 ; CHECK-GI-NEXT: ldr d1, [x0]
3367 ; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h
3368 ; CHECK-GI-NEXT: ldr d1, [x1]
3369 ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
3370 ; CHECK-GI-NEXT: ret
3371 %tmp1 = load <4 x i16>, ptr %A
3372 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
3373 %tmp4 = load <4 x i16>, ptr %B
3374 %tmp5 = add <4 x i16> %tmp3, %tmp4
3378 define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind {
3379 ; CHECK-SD-LABEL: srsra2s:
3380 ; CHECK-SD: // %bb.0:
3381 ; CHECK-SD-NEXT: ldr d1, [x0]
3382 ; CHECK-SD-NEXT: ldr d0, [x1]
3383 ; CHECK-SD-NEXT: srsra v0.2s, v1.2s, #1
3384 ; CHECK-SD-NEXT: ret
3386 ; CHECK-GI-LABEL: srsra2s:
3387 ; CHECK-GI: // %bb.0:
3388 ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
3389 ; CHECK-GI-NEXT: ldr d1, [x0]
3390 ; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s
3391 ; CHECK-GI-NEXT: ldr d1, [x1]
3392 ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
3393 ; CHECK-GI-NEXT: ret
3394 %tmp1 = load <2 x i32>, ptr %A
3395 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
3396 %tmp4 = load <2 x i32>, ptr %B
3397 %tmp5 = add <2 x i32> %tmp3, %tmp4
3401 define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind {
3402 ; CHECK-SD-LABEL: srsra16b:
3403 ; CHECK-SD: // %bb.0:
3404 ; CHECK-SD-NEXT: ldr q1, [x0]
3405 ; CHECK-SD-NEXT: ldr q0, [x1]
3406 ; CHECK-SD-NEXT: srsra v0.16b, v1.16b, #1
3407 ; CHECK-SD-NEXT: ret
3409 ; CHECK-GI-LABEL: srsra16b:
3410 ; CHECK-GI: // %bb.0:
3411 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
3412 ; CHECK-GI-NEXT: ldr q1, [x0]
3413 ; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b
3414 ; CHECK-GI-NEXT: ldr q1, [x1]
3415 ; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
3416 ; CHECK-GI-NEXT: ret
3417 %tmp1 = load <16 x i8>, ptr %A
3418 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
3419 %tmp4 = load <16 x i8>, ptr %B
3420 %tmp5 = add <16 x i8> %tmp3, %tmp4
3424 define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind {
3425 ; CHECK-SD-LABEL: srsra8h:
3426 ; CHECK-SD: // %bb.0:
3427 ; CHECK-SD-NEXT: ldr q1, [x0]
3428 ; CHECK-SD-NEXT: ldr q0, [x1]
3429 ; CHECK-SD-NEXT: srsra v0.8h, v1.8h, #1
3430 ; CHECK-SD-NEXT: ret
3432 ; CHECK-GI-LABEL: srsra8h:
3433 ; CHECK-GI: // %bb.0:
3434 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
3435 ; CHECK-GI-NEXT: ldr q1, [x0]
3436 ; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h
3437 ; CHECK-GI-NEXT: ldr q1, [x1]
3438 ; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
3439 ; CHECK-GI-NEXT: ret
3440 %tmp1 = load <8 x i16>, ptr %A
3441 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
3442 %tmp4 = load <8 x i16>, ptr %B
3443 %tmp5 = add <8 x i16> %tmp3, %tmp4
3447 define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind {
3448 ; CHECK-SD-LABEL: srsra4s:
3449 ; CHECK-SD: // %bb.0:
3450 ; CHECK-SD-NEXT: ldr q1, [x0]
3451 ; CHECK-SD-NEXT: ldr q0, [x1]
3452 ; CHECK-SD-NEXT: srsra v0.4s, v1.4s, #1
3453 ; CHECK-SD-NEXT: ret
3455 ; CHECK-GI-LABEL: srsra4s:
3456 ; CHECK-GI: // %bb.0:
3457 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
3458 ; CHECK-GI-NEXT: ldr q1, [x0]
3459 ; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s
3460 ; CHECK-GI-NEXT: ldr q1, [x1]
3461 ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
3462 ; CHECK-GI-NEXT: ret
3463 %tmp1 = load <4 x i32>, ptr %A
3464 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
3465 %tmp4 = load <4 x i32>, ptr %B
3466 %tmp5 = add <4 x i32> %tmp3, %tmp4
3470 define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind {
3471 ; CHECK-SD-LABEL: srsra2d:
3472 ; CHECK-SD: // %bb.0:
3473 ; CHECK-SD-NEXT: ldr q1, [x0]
3474 ; CHECK-SD-NEXT: ldr q0, [x1]
3475 ; CHECK-SD-NEXT: srsra v0.2d, v1.2d, #1
3476 ; CHECK-SD-NEXT: ret
3478 ; CHECK-GI-LABEL: srsra2d:
3479 ; CHECK-GI: // %bb.0:
3480 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
3481 ; CHECK-GI-NEXT: ldr q1, [x0]
3482 ; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d
3483 ; CHECK-GI-NEXT: ldr q1, [x1]
3484 ; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
3485 ; CHECK-GI-NEXT: ret
3486 %tmp1 = load <2 x i64>, ptr %A
3487 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
3488 %tmp4 = load <2 x i64>, ptr %B
3489 %tmp5 = add <2 x i64> %tmp3, %tmp4
3493 define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind {
3494 ; CHECK-LABEL: srsra1d:
3496 ; CHECK-NEXT: ldr d1, [x0]
3497 ; CHECK-NEXT: ldr d0, [x1]
3498 ; CHECK-NEXT: srsra d0, d1, #1
3500 %tmp1 = load <1 x i64>, ptr %A
3501 %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
3502 %tmp4 = load <1 x i64>, ptr %B
3503 %tmp5 = add <1 x i64> %tmp3, %tmp4
3507 define i64 @srsra_scalar(ptr %A, ptr %B) nounwind {
3508 ; CHECK-LABEL: srsra_scalar:
3510 ; CHECK-NEXT: ldr d0, [x0]
3511 ; CHECK-NEXT: ldr d1, [x1]
3512 ; CHECK-NEXT: srsra d1, d0, #1
3513 ; CHECK-NEXT: fmov x0, d1
3515 %tmp1 = load i64, ptr %A
3516 %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
3517 %tmp4 = load i64, ptr %B
3518 %tmp5 = add i64 %tmp3, %tmp4
3522 define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind {
3523 ; CHECK-LABEL: usra8b:
3525 ; CHECK-NEXT: ldr d1, [x0]
3526 ; CHECK-NEXT: ldr d0, [x1]
3527 ; CHECK-NEXT: usra v0.8b, v1.8b, #1
3529 %tmp1 = load <8 x i8>, ptr %A
3530 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3531 %tmp4 = load <8 x i8>, ptr %B
3532 %tmp5 = add <8 x i8> %tmp3, %tmp4
3536 define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind {
3537 ; CHECK-LABEL: usra4h:
3539 ; CHECK-NEXT: ldr d1, [x0]
3540 ; CHECK-NEXT: ldr d0, [x1]
3541 ; CHECK-NEXT: usra v0.4h, v1.4h, #1
3543 %tmp1 = load <4 x i16>, ptr %A
3544 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
3545 %tmp4 = load <4 x i16>, ptr %B
3546 %tmp5 = add <4 x i16> %tmp3, %tmp4
3550 define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind {
3551 ; CHECK-LABEL: usra2s:
3553 ; CHECK-NEXT: ldr d1, [x0]
3554 ; CHECK-NEXT: ldr d0, [x1]
3555 ; CHECK-NEXT: usra v0.2s, v1.2s, #1
3557 %tmp1 = load <2 x i32>, ptr %A
3558 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
3559 %tmp4 = load <2 x i32>, ptr %B
3560 %tmp5 = add <2 x i32> %tmp3, %tmp4
3564 define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind {
3565 ; CHECK-LABEL: usra16b:
3567 ; CHECK-NEXT: ldr q1, [x0]
3568 ; CHECK-NEXT: ldr q0, [x1]
3569 ; CHECK-NEXT: usra v0.16b, v1.16b, #1
3571 %tmp1 = load <16 x i8>, ptr %A
3572 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3573 %tmp4 = load <16 x i8>, ptr %B
3574 %tmp5 = add <16 x i8> %tmp3, %tmp4
3578 define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind {
3579 ; CHECK-LABEL: usra8h:
3581 ; CHECK-NEXT: ldr q1, [x0]
3582 ; CHECK-NEXT: ldr q0, [x1]
3583 ; CHECK-NEXT: usra v0.8h, v1.8h, #1
3585 %tmp1 = load <8 x i16>, ptr %A
3586 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3587 %tmp4 = load <8 x i16>, ptr %B
3588 %tmp5 = add <8 x i16> %tmp3, %tmp4
3592 define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind {
3593 ; CHECK-LABEL: usra4s:
3595 ; CHECK-NEXT: ldr q1, [x0]
3596 ; CHECK-NEXT: ldr q0, [x1]
3597 ; CHECK-NEXT: usra v0.4s, v1.4s, #1
3599 %tmp1 = load <4 x i32>, ptr %A
3600 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3601 %tmp4 = load <4 x i32>, ptr %B
3602 %tmp5 = add <4 x i32> %tmp3, %tmp4
3606 define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind {
3607 ; CHECK-LABEL: usra2d:
3609 ; CHECK-NEXT: ldr q1, [x0]
3610 ; CHECK-NEXT: ldr q0, [x1]
3611 ; CHECK-NEXT: usra v0.2d, v1.2d, #1
3613 %tmp1 = load <2 x i64>, ptr %A
3614 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
3615 %tmp4 = load <2 x i64>, ptr %B
3616 %tmp5 = add <2 x i64> %tmp3, %tmp4
3620 define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind {
3621 ; CHECK-SD-LABEL: usra1d:
3622 ; CHECK-SD: // %bb.0:
3623 ; CHECK-SD-NEXT: ldr d1, [x0]
3624 ; CHECK-SD-NEXT: ldr d0, [x1]
3625 ; CHECK-SD-NEXT: usra d0, d1, #1
3626 ; CHECK-SD-NEXT: ret
3628 ; CHECK-GI-LABEL: usra1d:
3629 ; CHECK-GI: // %bb.0:
3630 ; CHECK-GI-NEXT: ldr x8, [x0]
3631 ; CHECK-GI-NEXT: ldr x9, [x1]
3632 ; CHECK-GI-NEXT: add x8, x9, x8, lsr #1
3633 ; CHECK-GI-NEXT: fmov d0, x8
3634 ; CHECK-GI-NEXT: ret
3635 %tmp1 = load <1 x i64>, ptr %A
3636 %tmp3 = lshr <1 x i64> %tmp1, <i64 1>
3637 %tmp4 = load <1 x i64>, ptr %B
3638 %tmp5 = add <1 x i64> %tmp3, %tmp4
3642 define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind {
3643 ; CHECK-LABEL: ssra8b:
3645 ; CHECK-NEXT: ldr d1, [x0]
3646 ; CHECK-NEXT: ldr d0, [x1]
3647 ; CHECK-NEXT: ssra v0.8b, v1.8b, #1
3649 %tmp1 = load <8 x i8>, ptr %A
3650 %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3651 %tmp4 = load <8 x i8>, ptr %B
3652 %tmp5 = add <8 x i8> %tmp3, %tmp4
3656 define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind {
3657 ; CHECK-LABEL: ssra4h:
3659 ; CHECK-NEXT: ldr d1, [x0]
3660 ; CHECK-NEXT: ldr d0, [x1]
3661 ; CHECK-NEXT: ssra v0.4h, v1.4h, #1
3663 %tmp1 = load <4 x i16>, ptr %A
3664 %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
3665 %tmp4 = load <4 x i16>, ptr %B
3666 %tmp5 = add <4 x i16> %tmp3, %tmp4
3670 define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind {
3671 ; CHECK-LABEL: ssra2s:
3673 ; CHECK-NEXT: ldr d1, [x0]
3674 ; CHECK-NEXT: ldr d0, [x1]
3675 ; CHECK-NEXT: ssra v0.2s, v1.2s, #1
3677 %tmp1 = load <2 x i32>, ptr %A
3678 %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
3679 %tmp4 = load <2 x i32>, ptr %B
3680 %tmp5 = add <2 x i32> %tmp3, %tmp4
3684 define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind {
3685 ; CHECK-LABEL: ssra16b:
3687 ; CHECK-NEXT: ldr q1, [x0]
3688 ; CHECK-NEXT: ldr q0, [x1]
3689 ; CHECK-NEXT: ssra v0.16b, v1.16b, #1
3691 %tmp1 = load <16 x i8>, ptr %A
3692 %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3693 %tmp4 = load <16 x i8>, ptr %B
3694 %tmp5 = add <16 x i8> %tmp3, %tmp4
3698 define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind {
3699 ; CHECK-LABEL: ssra8h:
3701 ; CHECK-NEXT: ldr q1, [x0]
3702 ; CHECK-NEXT: ldr q0, [x1]
3703 ; CHECK-NEXT: ssra v0.8h, v1.8h, #1
3705 %tmp1 = load <8 x i16>, ptr %A
3706 %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3707 %tmp4 = load <8 x i16>, ptr %B
3708 %tmp5 = add <8 x i16> %tmp3, %tmp4
3712 define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind {
3713 ; CHECK-LABEL: ssra4s:
3715 ; CHECK-NEXT: ldr q1, [x0]
3716 ; CHECK-NEXT: ldr q0, [x1]
3717 ; CHECK-NEXT: ssra v0.4s, v1.4s, #1
3719 %tmp1 = load <4 x i32>, ptr %A
3720 %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3721 %tmp4 = load <4 x i32>, ptr %B
3722 %tmp5 = add <4 x i32> %tmp3, %tmp4
3726 define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind {
3727 ; CHECK-LABEL: ssra2d:
3729 ; CHECK-NEXT: ldr q1, [x0]
3730 ; CHECK-NEXT: ldr q0, [x1]
3731 ; CHECK-NEXT: ssra v0.2d, v1.2d, #1
3733 %tmp1 = load <2 x i64>, ptr %A
3734 %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
3735 %tmp4 = load <2 x i64>, ptr %B
3736 %tmp5 = add <2 x i64> %tmp3, %tmp4
3740 define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind {
3741 ; CHECK-LABEL: shr_orr8b:
3743 ; CHECK-NEXT: ldr d0, [x0]
3744 ; CHECK-NEXT: ldr d1, [x1]
3745 ; CHECK-NEXT: ushr v0.8b, v0.8b, #1
3746 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
3748 %tmp1 = load <8 x i8>, ptr %A
3749 %tmp4 = load <8 x i8>, ptr %B
3750 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3751 %tmp5 = or <8 x i8> %tmp3, %tmp4
3755 define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind {
3756 ; CHECK-LABEL: shr_orr4h:
3758 ; CHECK-NEXT: ldr d0, [x0]
3759 ; CHECK-NEXT: ldr d1, [x1]
3760 ; CHECK-NEXT: ushr v0.4h, v0.4h, #1
3761 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
3763 %tmp1 = load <4 x i16>, ptr %A
3764 %tmp4 = load <4 x i16>, ptr %B
3765 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
3766 %tmp5 = or <4 x i16> %tmp3, %tmp4
3770 define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind {
3771 ; CHECK-LABEL: shr_orr2s:
3773 ; CHECK-NEXT: ldr d0, [x0]
3774 ; CHECK-NEXT: ldr d1, [x1]
3775 ; CHECK-NEXT: ushr v0.2s, v0.2s, #1
3776 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
3778 %tmp1 = load <2 x i32>, ptr %A
3779 %tmp4 = load <2 x i32>, ptr %B
3780 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
3781 %tmp5 = or <2 x i32> %tmp3, %tmp4
3785 define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind {
3786 ; CHECK-LABEL: shr_orr16b:
3788 ; CHECK-NEXT: ldr q0, [x0]
3789 ; CHECK-NEXT: ldr q1, [x1]
3790 ; CHECK-NEXT: ushr v0.16b, v0.16b, #1
3791 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
3793 %tmp1 = load <16 x i8>, ptr %A
3794 %tmp4 = load <16 x i8>, ptr %B
3795 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3796 %tmp5 = or <16 x i8> %tmp3, %tmp4
3800 define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind {
3801 ; CHECK-LABEL: shr_orr8h:
3803 ; CHECK-NEXT: ldr q0, [x0]
3804 ; CHECK-NEXT: ldr q1, [x1]
3805 ; CHECK-NEXT: ushr v0.8h, v0.8h, #1
3806 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
3808 %tmp1 = load <8 x i16>, ptr %A
3809 %tmp4 = load <8 x i16>, ptr %B
3810 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3811 %tmp5 = or <8 x i16> %tmp3, %tmp4
3815 define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind {
3816 ; CHECK-LABEL: shr_orr4s:
3818 ; CHECK-NEXT: ldr q0, [x0]
3819 ; CHECK-NEXT: ldr q1, [x1]
3820 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1
3821 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
3823 %tmp1 = load <4 x i32>, ptr %A
3824 %tmp4 = load <4 x i32>, ptr %B
3825 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3826 %tmp5 = or <4 x i32> %tmp3, %tmp4
3830 define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind {
3831 ; CHECK-LABEL: shr_orr2d:
3833 ; CHECK-NEXT: ldr q0, [x0]
3834 ; CHECK-NEXT: ldr q1, [x1]
3835 ; CHECK-NEXT: ushr v0.2d, v0.2d, #1
3836 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
3838 %tmp1 = load <2 x i64>, ptr %A
3839 %tmp4 = load <2 x i64>, ptr %B
3840 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
3841 %tmp5 = or <2 x i64> %tmp3, %tmp4
3845 define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind {
3846 ; CHECK-SD-LABEL: shl_orr8b:
3847 ; CHECK-SD: // %bb.0:
3848 ; CHECK-SD-NEXT: ldr d0, [x0]
3849 ; CHECK-SD-NEXT: ldr d1, [x1]
3850 ; CHECK-SD-NEXT: add v0.8b, v0.8b, v0.8b
3851 ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
3852 ; CHECK-SD-NEXT: ret
3854 ; CHECK-GI-LABEL: shl_orr8b:
3855 ; CHECK-GI: // %bb.0:
3856 ; CHECK-GI-NEXT: ldr d0, [x0]
3857 ; CHECK-GI-NEXT: ldr d1, [x1]
3858 ; CHECK-GI-NEXT: shl v0.8b, v0.8b, #1
3859 ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
3860 ; CHECK-GI-NEXT: ret
3861 %tmp1 = load <8 x i8>, ptr %A
3862 %tmp4 = load <8 x i8>, ptr %B
3863 %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3864 %tmp5 = or <8 x i8> %tmp3, %tmp4
3868 define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind {
3869 ; CHECK-SD-LABEL: shl_orr4h:
3870 ; CHECK-SD: // %bb.0:
3871 ; CHECK-SD-NEXT: ldr d0, [x0]
3872 ; CHECK-SD-NEXT: ldr d1, [x1]
3873 ; CHECK-SD-NEXT: add v0.4h, v0.4h, v0.4h
3874 ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
3875 ; CHECK-SD-NEXT: ret
3877 ; CHECK-GI-LABEL: shl_orr4h:
3878 ; CHECK-GI: // %bb.0:
3879 ; CHECK-GI-NEXT: ldr d0, [x0]
3880 ; CHECK-GI-NEXT: ldr d1, [x1]
3881 ; CHECK-GI-NEXT: shl v0.4h, v0.4h, #1
3882 ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
3883 ; CHECK-GI-NEXT: ret
3884 %tmp1 = load <4 x i16>, ptr %A
3885 %tmp4 = load <4 x i16>, ptr %B
3886 %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
3887 %tmp5 = or <4 x i16> %tmp3, %tmp4
3891 define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind {
3892 ; CHECK-SD-LABEL: shl_orr2s:
3893 ; CHECK-SD: // %bb.0:
3894 ; CHECK-SD-NEXT: ldr d0, [x0]
3895 ; CHECK-SD-NEXT: ldr d1, [x1]
3896 ; CHECK-SD-NEXT: add v0.2s, v0.2s, v0.2s
3897 ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
3898 ; CHECK-SD-NEXT: ret
3900 ; CHECK-GI-LABEL: shl_orr2s:
3901 ; CHECK-GI: // %bb.0:
3902 ; CHECK-GI-NEXT: ldr d0, [x0]
3903 ; CHECK-GI-NEXT: ldr d1, [x1]
3904 ; CHECK-GI-NEXT: shl v0.2s, v0.2s, #1
3905 ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
3906 ; CHECK-GI-NEXT: ret
3907 %tmp1 = load <2 x i32>, ptr %A
3908 %tmp4 = load <2 x i32>, ptr %B
3909 %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
3910 %tmp5 = or <2 x i32> %tmp3, %tmp4
3914 define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind {
3915 ; CHECK-SD-LABEL: shl_orr16b:
3916 ; CHECK-SD: // %bb.0:
3917 ; CHECK-SD-NEXT: ldr q0, [x0]
3918 ; CHECK-SD-NEXT: ldr q1, [x1]
3919 ; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b
3920 ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
3921 ; CHECK-SD-NEXT: ret
3923 ; CHECK-GI-LABEL: shl_orr16b:
3924 ; CHECK-GI: // %bb.0:
3925 ; CHECK-GI-NEXT: ldr q0, [x0]
3926 ; CHECK-GI-NEXT: ldr q1, [x1]
3927 ; CHECK-GI-NEXT: shl v0.16b, v0.16b, #1
3928 ; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
3929 ; CHECK-GI-NEXT: ret
3930 %tmp1 = load <16 x i8>, ptr %A
3931 %tmp4 = load <16 x i8>, ptr %B
3932 %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
3933 %tmp5 = or <16 x i8> %tmp3, %tmp4
3937 define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind {
3938 ; CHECK-SD-LABEL: shl_orr8h:
3939 ; CHECK-SD: // %bb.0:
3940 ; CHECK-SD-NEXT: ldr q0, [x0]
3941 ; CHECK-SD-NEXT: ldr q1, [x1]
3942 ; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
3943 ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
3944 ; CHECK-SD-NEXT: ret
3946 ; CHECK-GI-LABEL: shl_orr8h:
3947 ; CHECK-GI: // %bb.0:
3948 ; CHECK-GI-NEXT: ldr q0, [x0]
3949 ; CHECK-GI-NEXT: ldr q1, [x1]
3950 ; CHECK-GI-NEXT: shl v0.8h, v0.8h, #1
3951 ; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
3952 ; CHECK-GI-NEXT: ret
3953 %tmp1 = load <8 x i16>, ptr %A
3954 %tmp4 = load <8 x i16>, ptr %B
3955 %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3956 %tmp5 = or <8 x i16> %tmp3, %tmp4
3960 define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind {
3961 ; CHECK-SD-LABEL: shl_orr4s:
3962 ; CHECK-SD: // %bb.0:
3963 ; CHECK-SD-NEXT: ldr q0, [x0]
3964 ; CHECK-SD-NEXT: ldr q1, [x1]
3965 ; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s
3966 ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
3967 ; CHECK-SD-NEXT: ret
3969 ; CHECK-GI-LABEL: shl_orr4s:
3970 ; CHECK-GI: // %bb.0:
3971 ; CHECK-GI-NEXT: ldr q0, [x0]
3972 ; CHECK-GI-NEXT: ldr q1, [x1]
3973 ; CHECK-GI-NEXT: shl v0.4s, v0.4s, #1
3974 ; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
3975 ; CHECK-GI-NEXT: ret
3976 %tmp1 = load <4 x i32>, ptr %A
3977 %tmp4 = load <4 x i32>, ptr %B
3978 %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3979 %tmp5 = or <4 x i32> %tmp3, %tmp4
3983 define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind {
3984 ; CHECK-SD-LABEL: shl_orr2d:
3985 ; CHECK-SD: // %bb.0:
3986 ; CHECK-SD-NEXT: ldr q0, [x0]
3987 ; CHECK-SD-NEXT: ldr q1, [x1]
3988 ; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d
3989 ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
3990 ; CHECK-SD-NEXT: ret
3992 ; CHECK-GI-LABEL: shl_orr2d:
3993 ; CHECK-GI: // %bb.0:
3994 ; CHECK-GI-NEXT: ldr q0, [x0]
3995 ; CHECK-GI-NEXT: ldr q1, [x1]
3996 ; CHECK-GI-NEXT: shl v0.2d, v0.2d, #1
3997 ; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
3998 ; CHECK-GI-NEXT: ret
3999 %tmp1 = load <2 x i64>, ptr %A
4000 %tmp4 = load <2 x i64>, ptr %B
4001 %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
4002 %tmp5 = or <2 x i64> %tmp3, %tmp4
4006 define <8 x i16> @shll(<8 x i8> %in) {
4007 ; CHECK-SD-LABEL: shll:
4008 ; CHECK-SD: // %bb.0:
4009 ; CHECK-SD-NEXT: shll v0.8h, v0.8b, #8
4010 ; CHECK-SD-NEXT: ret
4012 ; CHECK-GI-LABEL: shll:
4013 ; CHECK-GI: // %bb.0:
4014 ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
4015 ; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
4016 ; CHECK-GI-NEXT: ret
4017 %ext = zext <8 x i8> %in to <8 x i16>
4018 %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
4022 define <4 x i32> @shll_high(<8 x i16> %in) {
4023 ; CHECK-SD-LABEL: shll_high:
4024 ; CHECK-SD: // %bb.0:
4025 ; CHECK-SD-NEXT: shll2 v0.4s, v0.8h, #16
4026 ; CHECK-SD-NEXT: ret
4028 ; CHECK-GI-LABEL: shll_high:
4029 ; CHECK-GI: // %bb.0:
4030 ; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
4031 ; CHECK-GI-NEXT: shl v0.4s, v0.4s, #16
4032 ; CHECK-GI-NEXT: ret
4033 %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
4034 %ext = zext <4 x i16> %extract to <4 x i32>
4035 %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
4039 define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind {
4040 ; CHECK-LABEL: sli8b:
4042 ; CHECK-NEXT: ldr d0, [x0]
4043 ; CHECK-NEXT: ldr d1, [x1]
4044 ; CHECK-NEXT: sli v0.8b, v1.8b, #1
4046 %tmp1 = load <8 x i8>, ptr %A
4047 %tmp2 = load <8 x i8>, ptr %B
4048 %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
4052 define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind {
4053 ; CHECK-LABEL: sli4h:
4055 ; CHECK-NEXT: ldr d0, [x0]
4056 ; CHECK-NEXT: ldr d1, [x1]
4057 ; CHECK-NEXT: sli v0.4h, v1.4h, #1
4059 %tmp1 = load <4 x i16>, ptr %A
4060 %tmp2 = load <4 x i16>, ptr %B
4061 %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
4065 define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind {
4066 ; CHECK-LABEL: sli2s:
4068 ; CHECK-NEXT: ldr d0, [x0]
4069 ; CHECK-NEXT: ldr d1, [x1]
4070 ; CHECK-NEXT: sli v0.2s, v1.2s, #1
4072 %tmp1 = load <2 x i32>, ptr %A
4073 %tmp2 = load <2 x i32>, ptr %B
4074 %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
4078 define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind {
4079 ; CHECK-LABEL: sli1d:
4081 ; CHECK-NEXT: ldr d0, [x0]
4082 ; CHECK-NEXT: ldr d1, [x1]
4083 ; CHECK-NEXT: sli d0, d1, #1
4085 %tmp1 = load <1 x i64>, ptr %A
4086 %tmp2 = load <1 x i64>, ptr %B
4087 %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
4091 define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind {
4092 ; CHECK-LABEL: sli16b:
4094 ; CHECK-NEXT: ldr q0, [x0]
4095 ; CHECK-NEXT: ldr q1, [x1]
4096 ; CHECK-NEXT: sli v0.16b, v1.16b, #1
4098 %tmp1 = load <16 x i8>, ptr %A
4099 %tmp2 = load <16 x i8>, ptr %B
4100 %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
4104 define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind {
4105 ; CHECK-LABEL: sli8h:
4107 ; CHECK-NEXT: ldr q0, [x0]
4108 ; CHECK-NEXT: ldr q1, [x1]
4109 ; CHECK-NEXT: sli v0.8h, v1.8h, #1
4111 %tmp1 = load <8 x i16>, ptr %A
4112 %tmp2 = load <8 x i16>, ptr %B
4113 %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
4117 define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind {
4118 ; CHECK-LABEL: sli4s:
4120 ; CHECK-NEXT: ldr q0, [x0]
4121 ; CHECK-NEXT: ldr q1, [x1]
4122 ; CHECK-NEXT: sli v0.4s, v1.4s, #1
4124 %tmp1 = load <4 x i32>, ptr %A
4125 %tmp2 = load <4 x i32>, ptr %B
4126 %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
4130 define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind {
4131 ; CHECK-LABEL: sli2d:
4133 ; CHECK-NEXT: ldr q0, [x0]
4134 ; CHECK-NEXT: ldr q1, [x1]
4135 ; CHECK-NEXT: sli v0.2d, v1.2d, #1
4137 %tmp1 = load <2 x i64>, ptr %A
4138 %tmp2 = load <2 x i64>, ptr %B
4139 %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
4143 declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
4144 declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
4145 declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
4146 declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
4148 declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
4149 declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
4150 declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
4151 declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
4153 define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
4154 ; CHECK-SD-LABEL: ashr_v1i64:
4155 ; CHECK-SD: // %bb.0:
4156 ; CHECK-SD-NEXT: neg d1, d1
4157 ; CHECK-SD-NEXT: sshl d0, d0, d1
4158 ; CHECK-SD-NEXT: ret
4160 ; CHECK-GI-LABEL: ashr_v1i64:
4161 ; CHECK-GI: // %bb.0:
4162 ; CHECK-GI-NEXT: fmov x8, d0
4163 ; CHECK-GI-NEXT: fmov x9, d1
4164 ; CHECK-GI-NEXT: asr x8, x8, x9
4165 ; CHECK-GI-NEXT: fmov d0, x8
4166 ; CHECK-GI-NEXT: ret
4167 %c = ashr <1 x i64> %a, %b
4171 define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
4172 ; CHECK-SD-LABEL: sqshl_zero_shift_amount:
4173 ; CHECK-SD: // %bb.0: // %entry
4174 ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
4175 ; CHECK-SD-NEXT: str q0, [x0]
4176 ; CHECK-SD-NEXT: ret
4178 ; CHECK-GI-LABEL: sqshl_zero_shift_amount:
4179 ; CHECK-GI: // %bb.0: // %entry
4180 ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
4181 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
4182 ; CHECK-GI-NEXT: sqshl v0.2d, v0.2d, v2.2d
4183 ; CHECK-GI-NEXT: str q0, [x0]
4184 ; CHECK-GI-NEXT: ret
4186 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
4187 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
4188 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
4192 define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
4193 ; CHECK-SD-LABEL: uqshl_zero_shift_amount:
4194 ; CHECK-SD: // %bb.0: // %entry
4195 ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
4196 ; CHECK-SD-NEXT: str q0, [x0]
4197 ; CHECK-SD-NEXT: ret
4199 ; CHECK-GI-LABEL: uqshl_zero_shift_amount:
4200 ; CHECK-GI: // %bb.0: // %entry
4201 ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
4202 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
4203 ; CHECK-GI-NEXT: uqshl v0.2d, v0.2d, v2.2d
4204 ; CHECK-GI-NEXT: str q0, [x0]
4205 ; CHECK-GI-NEXT: ret
4207 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
4208 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
4209 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
4213 define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
4214 ; CHECK-SD-LABEL: srshl_zero_shift_amount:
4215 ; CHECK-SD: // %bb.0: // %entry
4216 ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
4217 ; CHECK-SD-NEXT: str q0, [x0]
4218 ; CHECK-SD-NEXT: ret
4220 ; CHECK-GI-LABEL: srshl_zero_shift_amount:
4221 ; CHECK-GI: // %bb.0: // %entry
4222 ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
4223 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
4224 ; CHECK-GI-NEXT: srshl v0.2d, v0.2d, v2.2d
4225 ; CHECK-GI-NEXT: str q0, [x0]
4226 ; CHECK-GI-NEXT: ret
4228 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
4229 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
4230 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
4234 define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
4235 ; CHECK-SD-LABEL: urshl_zero_shift_amount:
4236 ; CHECK-SD: // %bb.0: // %entry
4237 ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
4238 ; CHECK-SD-NEXT: str q0, [x0]
4239 ; CHECK-SD-NEXT: ret
4241 ; CHECK-GI-LABEL: urshl_zero_shift_amount:
4242 ; CHECK-GI: // %bb.0: // %entry
4243 ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
4244 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
4245 ; CHECK-GI-NEXT: urshl v0.2d, v0.2d, v2.2d
4246 ; CHECK-GI-NEXT: str q0, [x0]
4247 ; CHECK-GI-NEXT: ret
4249 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
4250 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
4251 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
4255 define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
4256 ; CHECK-LABEL: sqshlu_zero_shift_amount:
4257 ; CHECK: // %bb.0: // %entry
4258 ; CHECK-NEXT: addp v0.2d, v0.2d, v1.2d
4259 ; CHECK-NEXT: sqshlu v0.2d, v0.2d, #0
4260 ; CHECK-NEXT: str q0, [x0]
4263 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
4264 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
4265 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
4269 define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
4270 ; CHECK-SD-LABEL: sshl_zero_shift_amount:
4271 ; CHECK-SD: // %bb.0: // %entry
4272 ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
4273 ; CHECK-SD-NEXT: str q0, [x0]
4274 ; CHECK-SD-NEXT: ret
4276 ; CHECK-GI-LABEL: sshl_zero_shift_amount:
4277 ; CHECK-GI: // %bb.0: // %entry
4278 ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
4279 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
4280 ; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v2.2d
4281 ; CHECK-GI-NEXT: str q0, [x0]
4282 ; CHECK-GI-NEXT: ret
4284 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
4285 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
4286 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
4290 define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
4291 ; CHECK-SD-LABEL: ushl_zero_shift_amount:
4292 ; CHECK-SD: // %bb.0: // %entry
4293 ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
4294 ; CHECK-SD-NEXT: str q0, [x0]
4295 ; CHECK-SD-NEXT: ret
4297 ; CHECK-GI-LABEL: ushl_zero_shift_amount:
4298 ; CHECK-GI: // %bb.0: // %entry
4299 ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
4300 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
4301 ; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v2.2d
4302 ; CHECK-GI-NEXT: str q0, [x0]
4303 ; CHECK-GI-NEXT: ret
4305 %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
4306 %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
4307 store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
4311 define <4 x i32> @sext_rshrn(<4 x i32> noundef %a) {
4312 ; CHECK-LABEL: sext_rshrn:
4313 ; CHECK: // %bb.0: // %entry
4314 ; CHECK-NEXT: rshrn v0.4h, v0.4s, #13
4315 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
4318 %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
4319 %vmovl.i = sext <4 x i16> %vrshrn_n1 to <4 x i32>
4320 ret <4 x i32> %vmovl.i
4323 define <4 x i32> @zext_rshrn(<4 x i32> noundef %a) {
4324 ; CHECK-LABEL: zext_rshrn:
4325 ; CHECK: // %bb.0: // %entry
4326 ; CHECK-NEXT: rshrn v0.4h, v0.4s, #13
4327 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
4330 %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
4331 %vmovl.i = zext <4 x i16> %vrshrn_n1 to <4 x i32>
4332 ret <4 x i32> %vmovl.i
4335 define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) {
4336 ; CHECK-LABEL: mul_rshrn:
4337 ; CHECK: // %bb.0: // %entry
4338 ; CHECK-NEXT: movi v1.4s, #3
4339 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
4340 ; CHECK-NEXT: rshrn v0.4h, v0.4s, #13
4343 %b = add <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
4344 %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 13)
4345 ret <4 x i16> %vrshrn_n1
4348 define <8 x i16> @signbits_vashr(<8 x i16> %a) {
4349 ; CHECK-SD-LABEL: signbits_vashr:
4350 ; CHECK-SD: // %bb.0:
4351 ; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #8
4352 ; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #9
4353 ; CHECK-SD-NEXT: ret
4355 ; CHECK-GI-LABEL: signbits_vashr:
4356 ; CHECK-GI: // %bb.0:
4357 ; CHECK-GI-NEXT: mvni v1.8h, #7
4358 ; CHECK-GI-NEXT: mvni v2.8h, #8
4359 ; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h
4360 ; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v2.8h
4361 ; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #7
4362 ; CHECK-GI-NEXT: ret
4363 %b = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
4364 %c = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %b, <8 x i16> <i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9>)
4365 %d = ashr <8 x i16> %c, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
4369 define <2 x i8> @lshr_trunc_v2i64_v2i8(<2 x i64> %a) {
4370 ; CHECK-LABEL: lshr_trunc_v2i64_v2i8:
4372 ; CHECK-NEXT: shrn v0.2s, v0.2d, #16
4374 %b = lshr <2 x i64> %a, <i64 16, i64 16>
4375 %c = trunc <2 x i64> %b to <2 x i8>
4379 define <2 x i8> @ashr_trunc_v2i64_v2i8(<2 x i64> %a) {
4380 ; CHECK-LABEL: ashr_trunc_v2i64_v2i8:
4382 ; CHECK-NEXT: shrn v0.2s, v0.2d, #16
4384 %b = ashr <2 x i64> %a, <i64 16, i64 16>
4385 %c = trunc <2 x i64> %b to <2 x i8>
4389 define <2 x i8> @shl_trunc_v2i64_v2i8(<2 x i64> %a) {
4390 ; CHECK-SD-LABEL: shl_trunc_v2i64_v2i8:
4391 ; CHECK-SD: // %bb.0:
4392 ; CHECK-SD-NEXT: xtn v0.2s, v0.2d
4393 ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16
4394 ; CHECK-SD-NEXT: ret
4396 ; CHECK-GI-LABEL: shl_trunc_v2i64_v2i8:
4397 ; CHECK-GI: // %bb.0:
4398 ; CHECK-GI-NEXT: shl v0.2d, v0.2d, #16
4399 ; CHECK-GI-NEXT: xtn v0.2s, v0.2d
4400 ; CHECK-GI-NEXT: ret
4401 %b = shl <2 x i64> %a, <i64 16, i64 16>
4402 %c = trunc <2 x i64> %b to <2 x i8>
4406 declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)