1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s
3 define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
6 %tmp1 = load <8 x i8>, <8 x i8>* %A
7 %tmp2 = load <8 x i8>, <8 x i8>* %B
8 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
12 define <4 x i16> @sqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
13 ;CHECK-LABEL: sqshl4h:
15 %tmp1 = load <4 x i16>, <4 x i16>* %A
16 %tmp2 = load <4 x i16>, <4 x i16>* %B
17 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
21 define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
22 ;CHECK-LABEL: sqshl2s:
24 %tmp1 = load <2 x i32>, <2 x i32>* %A
25 %tmp2 = load <2 x i32>, <2 x i32>* %B
26 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
30 define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
31 ;CHECK-LABEL: uqshl8b:
33 %tmp1 = load <8 x i8>, <8 x i8>* %A
34 %tmp2 = load <8 x i8>, <8 x i8>* %B
35 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
39 define <4 x i16> @uqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
40 ;CHECK-LABEL: uqshl4h:
42 %tmp1 = load <4 x i16>, <4 x i16>* %A
43 %tmp2 = load <4 x i16>, <4 x i16>* %B
44 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
48 define <2 x i32> @uqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
49 ;CHECK-LABEL: uqshl2s:
51 %tmp1 = load <2 x i32>, <2 x i32>* %A
52 %tmp2 = load <2 x i32>, <2 x i32>* %B
53 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
57 define <16 x i8> @sqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
58 ;CHECK-LABEL: sqshl16b:
60 %tmp1 = load <16 x i8>, <16 x i8>* %A
61 %tmp2 = load <16 x i8>, <16 x i8>* %B
62 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
66 define <8 x i16> @sqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
67 ;CHECK-LABEL: sqshl8h:
69 %tmp1 = load <8 x i16>, <8 x i16>* %A
70 %tmp2 = load <8 x i16>, <8 x i16>* %B
71 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
75 define <4 x i32> @sqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
76 ;CHECK-LABEL: sqshl4s:
78 %tmp1 = load <4 x i32>, <4 x i32>* %A
79 %tmp2 = load <4 x i32>, <4 x i32>* %B
80 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
84 define <2 x i64> @sqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
85 ;CHECK-LABEL: sqshl2d:
87 %tmp1 = load <2 x i64>, <2 x i64>* %A
88 %tmp2 = load <2 x i64>, <2 x i64>* %B
89 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
93 define <16 x i8> @uqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
94 ;CHECK-LABEL: uqshl16b:
96 %tmp1 = load <16 x i8>, <16 x i8>* %A
97 %tmp2 = load <16 x i8>, <16 x i8>* %B
98 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
102 define <8 x i16> @uqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
103 ;CHECK-LABEL: uqshl8h:
105 %tmp1 = load <8 x i16>, <8 x i16>* %A
106 %tmp2 = load <8 x i16>, <8 x i16>* %B
107 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
111 define <4 x i32> @uqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
112 ;CHECK-LABEL: uqshl4s:
114 %tmp1 = load <4 x i32>, <4 x i32>* %A
115 %tmp2 = load <4 x i32>, <4 x i32>* %B
116 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
120 define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
121 ;CHECK-LABEL: uqshl2d:
123 %tmp1 = load <2 x i64>, <2 x i64>* %A
124 %tmp2 = load <2 x i64>, <2 x i64>* %B
125 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
129 declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
130 declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
131 declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
132 declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
134 declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
135 declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
136 declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
137 declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
139 declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
140 declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
141 declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
142 declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
144 declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
145 declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
146 declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
147 declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
149 define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
150 ;CHECK-LABEL: srshl8b:
152 %tmp1 = load <8 x i8>, <8 x i8>* %A
153 %tmp2 = load <8 x i8>, <8 x i8>* %B
154 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
158 define <4 x i16> @srshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
159 ;CHECK-LABEL: srshl4h:
161 %tmp1 = load <4 x i16>, <4 x i16>* %A
162 %tmp2 = load <4 x i16>, <4 x i16>* %B
163 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
167 define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
168 ;CHECK-LABEL: srshl2s:
170 %tmp1 = load <2 x i32>, <2 x i32>* %A
171 %tmp2 = load <2 x i32>, <2 x i32>* %B
172 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
176 define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
177 ;CHECK-LABEL: urshl8b:
179 %tmp1 = load <8 x i8>, <8 x i8>* %A
180 %tmp2 = load <8 x i8>, <8 x i8>* %B
181 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
185 define <4 x i16> @urshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
186 ;CHECK-LABEL: urshl4h:
188 %tmp1 = load <4 x i16>, <4 x i16>* %A
189 %tmp2 = load <4 x i16>, <4 x i16>* %B
190 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
194 define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
195 ;CHECK-LABEL: urshl2s:
197 %tmp1 = load <2 x i32>, <2 x i32>* %A
198 %tmp2 = load <2 x i32>, <2 x i32>* %B
199 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
203 define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
204 ;CHECK-LABEL: srshl16b:
206 %tmp1 = load <16 x i8>, <16 x i8>* %A
207 %tmp2 = load <16 x i8>, <16 x i8>* %B
208 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
212 define <8 x i16> @srshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
213 ;CHECK-LABEL: srshl8h:
215 %tmp1 = load <8 x i16>, <8 x i16>* %A
216 %tmp2 = load <8 x i16>, <8 x i16>* %B
217 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
221 define <4 x i32> @srshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
222 ;CHECK-LABEL: srshl4s:
224 %tmp1 = load <4 x i32>, <4 x i32>* %A
225 %tmp2 = load <4 x i32>, <4 x i32>* %B
226 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
230 define <2 x i64> @srshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
231 ;CHECK-LABEL: srshl2d:
233 %tmp1 = load <2 x i64>, <2 x i64>* %A
234 %tmp2 = load <2 x i64>, <2 x i64>* %B
235 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
239 define <16 x i8> @urshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
240 ;CHECK-LABEL: urshl16b:
242 %tmp1 = load <16 x i8>, <16 x i8>* %A
243 %tmp2 = load <16 x i8>, <16 x i8>* %B
244 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
248 define <8 x i16> @urshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
249 ;CHECK-LABEL: urshl8h:
251 %tmp1 = load <8 x i16>, <8 x i16>* %A
252 %tmp2 = load <8 x i16>, <8 x i16>* %B
253 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
257 define <4 x i32> @urshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
258 ;CHECK-LABEL: urshl4s:
260 %tmp1 = load <4 x i32>, <4 x i32>* %A
261 %tmp2 = load <4 x i32>, <4 x i32>* %B
262 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
266 define <2 x i64> @urshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
267 ;CHECK-LABEL: urshl2d:
269 %tmp1 = load <2 x i64>, <2 x i64>* %A
270 %tmp2 = load <2 x i64>, <2 x i64>* %B
271 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
275 declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
276 declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
277 declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
278 declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
280 declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
281 declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
282 declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
283 declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
285 declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
286 declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
287 declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
288 declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
290 declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
291 declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
292 declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
293 declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
295 define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
296 ;CHECK-LABEL: sqrshl8b:
298 %tmp1 = load <8 x i8>, <8 x i8>* %A
299 %tmp2 = load <8 x i8>, <8 x i8>* %B
300 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
304 define <4 x i16> @sqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
305 ;CHECK-LABEL: sqrshl4h:
307 %tmp1 = load <4 x i16>, <4 x i16>* %A
308 %tmp2 = load <4 x i16>, <4 x i16>* %B
309 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
313 define <2 x i32> @sqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
314 ;CHECK-LABEL: sqrshl2s:
316 %tmp1 = load <2 x i32>, <2 x i32>* %A
317 %tmp2 = load <2 x i32>, <2 x i32>* %B
318 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
322 define <8 x i8> @uqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
323 ;CHECK-LABEL: uqrshl8b:
325 %tmp1 = load <8 x i8>, <8 x i8>* %A
326 %tmp2 = load <8 x i8>, <8 x i8>* %B
327 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
331 define <4 x i16> @uqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
332 ;CHECK-LABEL: uqrshl4h:
334 %tmp1 = load <4 x i16>, <4 x i16>* %A
335 %tmp2 = load <4 x i16>, <4 x i16>* %B
336 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
340 define <2 x i32> @uqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
341 ;CHECK-LABEL: uqrshl2s:
343 %tmp1 = load <2 x i32>, <2 x i32>* %A
344 %tmp2 = load <2 x i32>, <2 x i32>* %B
345 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
349 define <16 x i8> @sqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
350 ;CHECK-LABEL: sqrshl16b:
352 %tmp1 = load <16 x i8>, <16 x i8>* %A
353 %tmp2 = load <16 x i8>, <16 x i8>* %B
354 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
358 define <8 x i16> @sqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
359 ;CHECK-LABEL: sqrshl8h:
361 %tmp1 = load <8 x i16>, <8 x i16>* %A
362 %tmp2 = load <8 x i16>, <8 x i16>* %B
363 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
367 define <4 x i32> @sqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
368 ;CHECK-LABEL: sqrshl4s:
370 %tmp1 = load <4 x i32>, <4 x i32>* %A
371 %tmp2 = load <4 x i32>, <4 x i32>* %B
372 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
376 define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
377 ;CHECK-LABEL: sqrshl2d:
379 %tmp1 = load <2 x i64>, <2 x i64>* %A
380 %tmp2 = load <2 x i64>, <2 x i64>* %B
381 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
385 define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
386 ;CHECK-LABEL: uqrshl16b:
388 %tmp1 = load <16 x i8>, <16 x i8>* %A
389 %tmp2 = load <16 x i8>, <16 x i8>* %B
390 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
394 define <8 x i16> @uqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
395 ;CHECK-LABEL: uqrshl8h:
397 %tmp1 = load <8 x i16>, <8 x i16>* %A
398 %tmp2 = load <8 x i16>, <8 x i16>* %B
399 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
403 define <4 x i32> @uqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
404 ;CHECK-LABEL: uqrshl4s:
406 %tmp1 = load <4 x i32>, <4 x i32>* %A
407 %tmp2 = load <4 x i32>, <4 x i32>* %B
408 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
412 define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
413 ;CHECK-LABEL: uqrshl2d:
415 %tmp1 = load <2 x i64>, <2 x i64>* %A
416 %tmp2 = load <2 x i64>, <2 x i64>* %B
417 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
421 declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
422 declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
423 declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
424 declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
426 declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
427 declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
428 declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
429 declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
431 declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
432 declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
433 declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
434 declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
436 declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
437 declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
438 declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
439 declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
441 define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind {
442 ;CHECK-LABEL: urshr8b:
444 %tmp1 = load <8 x i8>, <8 x i8>* %A
445 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
449 define <4 x i16> @urshr4h(<4 x i16>* %A) nounwind {
450 ;CHECK-LABEL: urshr4h:
452 %tmp1 = load <4 x i16>, <4 x i16>* %A
453 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
457 define <2 x i32> @urshr2s(<2 x i32>* %A) nounwind {
458 ;CHECK-LABEL: urshr2s:
460 %tmp1 = load <2 x i32>, <2 x i32>* %A
461 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
465 define <16 x i8> @urshr16b(<16 x i8>* %A) nounwind {
466 ;CHECK-LABEL: urshr16b:
468 %tmp1 = load <16 x i8>, <16 x i8>* %A
469 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
473 define <8 x i16> @urshr8h(<8 x i16>* %A) nounwind {
474 ;CHECK-LABEL: urshr8h:
476 %tmp1 = load <8 x i16>, <8 x i16>* %A
477 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
481 define <4 x i32> @urshr4s(<4 x i32>* %A) nounwind {
482 ;CHECK-LABEL: urshr4s:
484 %tmp1 = load <4 x i32>, <4 x i32>* %A
485 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
489 define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind {
490 ;CHECK-LABEL: urshr2d:
492 %tmp1 = load <2 x i64>, <2 x i64>* %A
493 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
497 define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind {
498 ;CHECK-LABEL: srshr8b:
500 %tmp1 = load <8 x i8>, <8 x i8>* %A
501 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
505 define <4 x i16> @srshr4h(<4 x i16>* %A) nounwind {
506 ;CHECK-LABEL: srshr4h:
508 %tmp1 = load <4 x i16>, <4 x i16>* %A
509 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
513 define <2 x i32> @srshr2s(<2 x i32>* %A) nounwind {
514 ;CHECK-LABEL: srshr2s:
516 %tmp1 = load <2 x i32>, <2 x i32>* %A
517 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
521 define <16 x i8> @srshr16b(<16 x i8>* %A) nounwind {
522 ;CHECK-LABEL: srshr16b:
524 %tmp1 = load <16 x i8>, <16 x i8>* %A
525 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
529 define <8 x i16> @srshr8h(<8 x i16>* %A) nounwind {
530 ;CHECK-LABEL: srshr8h:
532 %tmp1 = load <8 x i16>, <8 x i16>* %A
533 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
537 define <4 x i32> @srshr4s(<4 x i32>* %A) nounwind {
538 ;CHECK-LABEL: srshr4s:
540 %tmp1 = load <4 x i32>, <4 x i32>* %A
541 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
545 define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind {
546 ;CHECK-LABEL: srshr2d:
548 %tmp1 = load <2 x i64>, <2 x i64>* %A
549 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
553 define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind {
554 ;CHECK-LABEL: sqshlu8b:
555 ;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1
556 %tmp1 = load <8 x i8>, <8 x i8>* %A
557 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
561 define <4 x i16> @sqshlu4h(<4 x i16>* %A) nounwind {
562 ;CHECK-LABEL: sqshlu4h:
563 ;CHECK: sqshlu.4h v0, {{v[0-9]+}}, #1
564 %tmp1 = load <4 x i16>, <4 x i16>* %A
565 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
569 define <2 x i32> @sqshlu2s(<2 x i32>* %A) nounwind {
570 ;CHECK-LABEL: sqshlu2s:
571 ;CHECK: sqshlu.2s v0, {{v[0-9]+}}, #1
572 %tmp1 = load <2 x i32>, <2 x i32>* %A
573 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
577 define <16 x i8> @sqshlu16b(<16 x i8>* %A) nounwind {
578 ;CHECK-LABEL: sqshlu16b:
579 ;CHECK: sqshlu.16b v0, {{v[0-9]+}}, #1
580 %tmp1 = load <16 x i8>, <16 x i8>* %A
581 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
585 define <8 x i16> @sqshlu8h(<8 x i16>* %A) nounwind {
586 ;CHECK-LABEL: sqshlu8h:
587 ;CHECK: sqshlu.8h v0, {{v[0-9]+}}, #1
588 %tmp1 = load <8 x i16>, <8 x i16>* %A
589 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
593 define <4 x i32> @sqshlu4s(<4 x i32>* %A) nounwind {
594 ;CHECK-LABEL: sqshlu4s:
595 ;CHECK: sqshlu.4s v0, {{v[0-9]+}}, #1
596 %tmp1 = load <4 x i32>, <4 x i32>* %A
597 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
601 define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind {
602 ;CHECK-LABEL: sqshlu2d:
603 ;CHECK: sqshlu.2d v0, {{v[0-9]+}}, #1
604 %tmp1 = load <2 x i64>, <2 x i64>* %A
605 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
609 declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
610 declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
611 declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
612 declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
614 declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
615 declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
616 declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
617 declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
619 define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind {
620 ;CHECK-LABEL: rshrn8b:
621 ;CHECK: rshrn.8b v0, {{v[0-9]+}}, #1
622 %tmp1 = load <8 x i16>, <8 x i16>* %A
623 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
627 define <4 x i16> @rshrn4h(<4 x i32>* %A) nounwind {
628 ;CHECK-LABEL: rshrn4h:
629 ;CHECK: rshrn.4h v0, {{v[0-9]+}}, #1
630 %tmp1 = load <4 x i32>, <4 x i32>* %A
631 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
635 define <2 x i32> @rshrn2s(<2 x i64>* %A) nounwind {
636 ;CHECK-LABEL: rshrn2s:
637 ;CHECK: rshrn.2s v0, {{v[0-9]+}}, #1
638 %tmp1 = load <2 x i64>, <2 x i64>* %A
639 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
643 define <16 x i8> @rshrn16b(<8 x i8> *%ret, <8 x i16>* %A) nounwind {
644 ;CHECK-LABEL: rshrn16b:
645 ;CHECK: rshrn2.16b v0, {{v[0-9]+}}, #1
646 %out = load <8 x i8>, <8 x i8>* %ret
647 %tmp1 = load <8 x i16>, <8 x i16>* %A
648 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
649 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
653 define <8 x i16> @rshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
654 ;CHECK-LABEL: rshrn8h:
655 ;CHECK: rshrn2.8h v0, {{v[0-9]+}}, #1
656 %out = load <4 x i16>, <4 x i16>* %ret
657 %tmp1 = load <4 x i32>, <4 x i32>* %A
658 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
659 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
663 define <4 x i32> @rshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
664 ;CHECK-LABEL: rshrn4s:
665 ;CHECK: rshrn2.4s v0, {{v[0-9]+}}, #1
666 %out = load <2 x i32>, <2 x i32>* %ret
667 %tmp1 = load <2 x i64>, <2 x i64>* %A
668 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
669 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
673 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
674 declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
675 declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
677 define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind {
678 ;CHECK-LABEL: shrn8b:
679 ;CHECK: shrn.8b v0, {{v[0-9]+}}, #1
680 %tmp1 = load <8 x i16>, <8 x i16>* %A
681 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
682 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
686 define <4 x i16> @shrn4h(<4 x i32>* %A) nounwind {
687 ;CHECK-LABEL: shrn4h:
688 ;CHECK: shrn.4h v0, {{v[0-9]+}}, #1
689 %tmp1 = load <4 x i32>, <4 x i32>* %A
690 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
691 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
695 define <2 x i32> @shrn2s(<2 x i64>* %A) nounwind {
696 ;CHECK-LABEL: shrn2s:
697 ;CHECK: shrn.2s v0, {{v[0-9]+}}, #1
698 %tmp1 = load <2 x i64>, <2 x i64>* %A
699 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
700 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
704 define <16 x i8> @shrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
705 ;CHECK-LABEL: shrn16b:
706 ;CHECK: shrn2.16b v0, {{v[0-9]+}}, #1
707 %out = load <8 x i8>, <8 x i8>* %ret
708 %tmp1 = load <8 x i16>, <8 x i16>* %A
709 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
710 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
711 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
715 define <8 x i16> @shrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
716 ;CHECK-LABEL: shrn8h:
717 ;CHECK: shrn2.8h v0, {{v[0-9]+}}, #1
718 %out = load <4 x i16>, <4 x i16>* %ret
719 %tmp1 = load <4 x i32>, <4 x i32>* %A
720 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
721 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
722 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
726 define <4 x i32> @shrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
727 ;CHECK-LABEL: shrn4s:
728 ;CHECK: shrn2.4s v0, {{v[0-9]+}}, #1
729 %out = load <2 x i32>, <2 x i32>* %ret
730 %tmp1 = load <2 x i64>, <2 x i64>* %A
731 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
732 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
733 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
737 declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
738 declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
739 declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
741 define i32 @sqshrn1s(i64 %A) nounwind {
742 ; CHECK-LABEL: sqshrn1s:
743 ; CHECK: sqshrn {{s[0-9]+}}, d0, #1
744 %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
748 define <8 x i8> @sqshrn8b(<8 x i16>* %A) nounwind {
749 ;CHECK-LABEL: sqshrn8b:
750 ;CHECK: sqshrn.8b v0, {{v[0-9]+}}, #1
751 %tmp1 = load <8 x i16>, <8 x i16>* %A
752 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
756 define <4 x i16> @sqshrn4h(<4 x i32>* %A) nounwind {
757 ;CHECK-LABEL: sqshrn4h:
758 ;CHECK: sqshrn.4h v0, {{v[0-9]+}}, #1
759 %tmp1 = load <4 x i32>, <4 x i32>* %A
760 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
764 define <2 x i32> @sqshrn2s(<2 x i64>* %A) nounwind {
765 ;CHECK-LABEL: sqshrn2s:
766 ;CHECK: sqshrn.2s v0, {{v[0-9]+}}, #1
767 %tmp1 = load <2 x i64>, <2 x i64>* %A
768 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
773 define <16 x i8> @sqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
774 ;CHECK-LABEL: sqshrn16b:
775 ;CHECK: sqshrn2.16b v0, {{v[0-9]+}}, #1
776 %out = load <8 x i8>, <8 x i8>* %ret
777 %tmp1 = load <8 x i16>, <8 x i16>* %A
778 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
779 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
783 define <8 x i16> @sqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
784 ;CHECK-LABEL: sqshrn8h:
785 ;CHECK: sqshrn2.8h v0, {{v[0-9]+}}, #1
786 %out = load <4 x i16>, <4 x i16>* %ret
787 %tmp1 = load <4 x i32>, <4 x i32>* %A
788 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
789 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
793 define <4 x i32> @sqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
794 ;CHECK-LABEL: sqshrn4s:
795 ;CHECK: sqshrn2.4s v0, {{v[0-9]+}}, #1
796 %out = load <2 x i32>, <2 x i32>* %ret
797 %tmp1 = load <2 x i64>, <2 x i64>* %A
798 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
799 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
803 declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
804 declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
805 declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
806 declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
808 define i32 @sqshrun1s(i64 %A) nounwind {
809 ; CHECK-LABEL: sqshrun1s:
810 ; CHECK: sqshrun {{s[0-9]+}}, d0, #1
811 %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
815 define <8 x i8> @sqshrun8b(<8 x i16>* %A) nounwind {
816 ;CHECK-LABEL: sqshrun8b:
817 ;CHECK: sqshrun.8b v0, {{v[0-9]+}}, #1
818 %tmp1 = load <8 x i16>, <8 x i16>* %A
819 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
823 define <4 x i16> @sqshrun4h(<4 x i32>* %A) nounwind {
824 ;CHECK-LABEL: sqshrun4h:
825 ;CHECK: sqshrun.4h v0, {{v[0-9]+}}, #1
826 %tmp1 = load <4 x i32>, <4 x i32>* %A
827 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
831 define <2 x i32> @sqshrun2s(<2 x i64>* %A) nounwind {
832 ;CHECK-LABEL: sqshrun2s:
833 ;CHECK: sqshrun.2s v0, {{v[0-9]+}}, #1
834 %tmp1 = load <2 x i64>, <2 x i64>* %A
835 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
839 define <16 x i8> @sqshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
840 ;CHECK-LABEL: sqshrun16b:
841 ;CHECK: sqshrun2.16b v0, {{v[0-9]+}}, #1
842 %out = load <8 x i8>, <8 x i8>* %ret
843 %tmp1 = load <8 x i16>, <8 x i16>* %A
844 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
845 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
849 define <8 x i16> @sqshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
850 ;CHECK-LABEL: sqshrun8h:
851 ;CHECK: sqshrun2.8h v0, {{v[0-9]+}}, #1
852 %out = load <4 x i16>, <4 x i16>* %ret
853 %tmp1 = load <4 x i32>, <4 x i32>* %A
854 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
855 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
859 define <4 x i32> @sqshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
860 ;CHECK-LABEL: sqshrun4s:
861 ;CHECK: sqshrun2.4s v0, {{v[0-9]+}}, #1
862 %out = load <2 x i32>, <2 x i32>* %ret
863 %tmp1 = load <2 x i64>, <2 x i64>* %A
864 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
865 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
869 declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
870 declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
871 declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
872 declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
874 define i32 @sqrshrn1s(i64 %A) nounwind {
875 ; CHECK-LABEL: sqrshrn1s:
876 ; CHECK: sqrshrn {{s[0-9]+}}, d0, #1
877 %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
881 define <8 x i8> @sqrshrn8b(<8 x i16>* %A) nounwind {
882 ;CHECK-LABEL: sqrshrn8b:
883 ;CHECK: sqrshrn.8b v0, {{v[0-9]+}}, #1
884 %tmp1 = load <8 x i16>, <8 x i16>* %A
885 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
889 define <4 x i16> @sqrshrn4h(<4 x i32>* %A) nounwind {
890 ;CHECK-LABEL: sqrshrn4h:
891 ;CHECK: sqrshrn.4h v0, {{v[0-9]+}}, #1
892 %tmp1 = load <4 x i32>, <4 x i32>* %A
893 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
897 define <2 x i32> @sqrshrn2s(<2 x i64>* %A) nounwind {
898 ;CHECK-LABEL: sqrshrn2s:
899 ;CHECK: sqrshrn.2s v0, {{v[0-9]+}}, #1
900 %tmp1 = load <2 x i64>, <2 x i64>* %A
901 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
905 define <16 x i8> @sqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
906 ;CHECK-LABEL: sqrshrn16b:
907 ;CHECK: sqrshrn2.16b v0, {{v[0-9]+}}, #1
908 %out = load <8 x i8>, <8 x i8>* %ret
909 %tmp1 = load <8 x i16>, <8 x i16>* %A
910 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
911 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
915 define <8 x i16> @sqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
916 ;CHECK-LABEL: sqrshrn8h:
917 ;CHECK: sqrshrn2.8h v0, {{v[0-9]+}}, #1
918 %out = load <4 x i16>, <4 x i16>* %ret
919 %tmp1 = load <4 x i32>, <4 x i32>* %A
920 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
921 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
925 define <4 x i32> @sqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
926 ;CHECK-LABEL: sqrshrn4s:
927 ;CHECK: sqrshrn2.4s v0, {{v[0-9]+}}, #1
928 %out = load <2 x i32>, <2 x i32>* %ret
929 %tmp1 = load <2 x i64>, <2 x i64>* %A
930 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
931 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
935 declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
936 declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
937 declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
938 declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
940 define i32 @sqrshrun1s(i64 %A) nounwind {
941 ; CHECK-LABEL: sqrshrun1s:
942 ; CHECK: sqrshrun {{s[0-9]+}}, d0, #1
943 %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
947 define <8 x i8> @sqrshrun8b(<8 x i16>* %A) nounwind {
948 ;CHECK-LABEL: sqrshrun8b:
949 ;CHECK: sqrshrun.8b v0, {{v[0-9]+}}, #1
950 %tmp1 = load <8 x i16>, <8 x i16>* %A
951 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
955 define <4 x i16> @sqrshrun4h(<4 x i32>* %A) nounwind {
956 ;CHECK-LABEL: sqrshrun4h:
957 ;CHECK: sqrshrun.4h v0, {{v[0-9]+}}, #1
958 %tmp1 = load <4 x i32>, <4 x i32>* %A
959 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
963 define <2 x i32> @sqrshrun2s(<2 x i64>* %A) nounwind {
964 ;CHECK-LABEL: sqrshrun2s:
965 ;CHECK: sqrshrun.2s v0, {{v[0-9]+}}, #1
966 %tmp1 = load <2 x i64>, <2 x i64>* %A
967 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
971 define <16 x i8> @sqrshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
972 ;CHECK-LABEL: sqrshrun16b:
973 ;CHECK: sqrshrun2.16b v0, {{v[0-9]+}}, #1
974 %out = load <8 x i8>, <8 x i8>* %ret
975 %tmp1 = load <8 x i16>, <8 x i16>* %A
976 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
977 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
981 define <8 x i16> @sqrshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
982 ;CHECK-LABEL: sqrshrun8h:
983 ;CHECK: sqrshrun2.8h v0, {{v[0-9]+}}, #1
984 %out = load <4 x i16>, <4 x i16>* %ret
985 %tmp1 = load <4 x i32>, <4 x i32>* %A
986 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
987 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
991 define <4 x i32> @sqrshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
992 ;CHECK-LABEL: sqrshrun4s:
993 ;CHECK: sqrshrun2.4s v0, {{v[0-9]+}}, #1
994 %out = load <2 x i32>, <2 x i32>* %ret
995 %tmp1 = load <2 x i64>, <2 x i64>* %A
996 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
997 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1001 declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
1002 declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
1003 declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
1004 declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
1006 define i32 @uqrshrn1s(i64 %A) nounwind {
1007 ; CHECK-LABEL: uqrshrn1s:
1008 ; CHECK: uqrshrn {{s[0-9]+}}, d0, #1
1009 %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
1013 define <8 x i8> @uqrshrn8b(<8 x i16>* %A) nounwind {
1014 ;CHECK-LABEL: uqrshrn8b:
1015 ;CHECK: uqrshrn.8b v0, {{v[0-9]+}}, #1
1016 %tmp1 = load <8 x i16>, <8 x i16>* %A
1017 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1021 define <4 x i16> @uqrshrn4h(<4 x i32>* %A) nounwind {
1022 ;CHECK-LABEL: uqrshrn4h:
1023 ;CHECK: uqrshrn.4h v0, {{v[0-9]+}}, #1
1024 %tmp1 = load <4 x i32>, <4 x i32>* %A
1025 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1029 define <2 x i32> @uqrshrn2s(<2 x i64>* %A) nounwind {
1030 ;CHECK-LABEL: uqrshrn2s:
1031 ;CHECK: uqrshrn.2s v0, {{v[0-9]+}}, #1
1032 %tmp1 = load <2 x i64>, <2 x i64>* %A
1033 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1037 define <16 x i8> @uqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
1038 ;CHECK-LABEL: uqrshrn16b:
1039 ;CHECK: uqrshrn2.16b v0, {{v[0-9]+}}, #1
1040 %out = load <8 x i8>, <8 x i8>* %ret
1041 %tmp1 = load <8 x i16>, <8 x i16>* %A
1042 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
1043 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1047 define <8 x i16> @uqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
1048 ;CHECK-LABEL: uqrshrn8h:
1049 ;CHECK: uqrshrn2.8h v0, {{v[0-9]+}}, #1
1050 %out = load <4 x i16>, <4 x i16>* %ret
1051 %tmp1 = load <4 x i32>, <4 x i32>* %A
1052 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
1053 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1057 define <4 x i32> @uqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
1058 ;CHECK-LABEL: uqrshrn4s:
1059 ;CHECK: uqrshrn2.4s v0, {{v[0-9]+}}, #1
1060 %out = load <2 x i32>, <2 x i32>* %ret
1061 %tmp1 = load <2 x i64>, <2 x i64>* %A
1062 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
1063 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1067 declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
1068 declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
1069 declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
1070 declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
1072 define i32 @uqshrn1s(i64 %A) nounwind {
1073 ; CHECK-LABEL: uqshrn1s:
1074 ; CHECK: uqshrn {{s[0-9]+}}, d0, #1
1075 %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
1079 define <8 x i8> @uqshrn8b(<8 x i16>* %A) nounwind {
1080 ;CHECK-LABEL: uqshrn8b:
1081 ;CHECK: uqshrn.8b v0, {{v[0-9]+}}, #1
1082 %tmp1 = load <8 x i16>, <8 x i16>* %A
1083 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1087 define <4 x i16> @uqshrn4h(<4 x i32>* %A) nounwind {
1088 ;CHECK-LABEL: uqshrn4h:
1089 ;CHECK: uqshrn.4h v0, {{v[0-9]+}}, #1
1090 %tmp1 = load <4 x i32>, <4 x i32>* %A
1091 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1095 define <2 x i32> @uqshrn2s(<2 x i64>* %A) nounwind {
1096 ;CHECK-LABEL: uqshrn2s:
1097 ;CHECK: uqshrn.2s v0, {{v[0-9]+}}, #1
1098 %tmp1 = load <2 x i64>, <2 x i64>* %A
1099 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1103 define <16 x i8> @uqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
1104 ;CHECK-LABEL: uqshrn16b:
1105 ;CHECK: uqshrn2.16b v0, {{v[0-9]+}}, #1
1106 %out = load <8 x i8>, <8 x i8>* %ret
1107 %tmp1 = load <8 x i16>, <8 x i16>* %A
1108 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
1109 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1113 define <8 x i16> @uqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
1114 ;CHECK-LABEL: uqshrn8h:
1115 ;CHECK: uqshrn2.8h v0, {{v[0-9]+}}, #1
1116 %out = load <4 x i16>, <4 x i16>* %ret
1117 %tmp1 = load <4 x i32>, <4 x i32>* %A
1118 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
1119 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1123 define <4 x i32> @uqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
1124 ;CHECK-LABEL: uqshrn4s:
1125 ;CHECK: uqshrn2.4s v0, {{v[0-9]+}}, #1
1126 %out = load <2 x i32>, <2 x i32>* %ret
1127 %tmp1 = load <2 x i64>, <2 x i64>* %A
1128 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
1129 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1133 declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
1134 declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
1135 declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
1136 declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
1138 define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind {
1139 ;CHECK-LABEL: ushll8h:
1140 ;CHECK: ushll.8h v0, {{v[0-9]+}}, #1
1141 %tmp1 = load <8 x i8>, <8 x i8>* %A
1142 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
1143 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1147 define <4 x i32> @ushll4s(<4 x i16>* %A) nounwind {
1148 ;CHECK-LABEL: ushll4s:
1149 ;CHECK: ushll.4s v0, {{v[0-9]+}}, #1
1150 %tmp1 = load <4 x i16>, <4 x i16>* %A
1151 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
1152 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
1156 define <2 x i64> @ushll2d(<2 x i32>* %A) nounwind {
1157 ;CHECK-LABEL: ushll2d:
1158 ;CHECK: ushll.2d v0, {{v[0-9]+}}, #1
1159 %tmp1 = load <2 x i32>, <2 x i32>* %A
1160 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
1161 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
1165 define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind {
1166 ;CHECK-LABEL: ushll2_8h:
1167 ;CHECK: ushll.8h v0, {{v[0-9]+}}, #1
1168 %load1 = load <16 x i8>, <16 x i8>* %A
1169 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1170 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
1171 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1175 define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind {
1176 ;CHECK-LABEL: ushll2_4s:
1177 ;CHECK: ushll.4s v0, {{v[0-9]+}}, #1
1178 %load1 = load <8 x i16>, <8 x i16>* %A
1179 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1180 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
1181 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
1185 define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind {
1186 ;CHECK-LABEL: ushll2_2d:
1187 ;CHECK: ushll.2d v0, {{v[0-9]+}}, #1
1188 %load1 = load <4 x i32>, <4 x i32>* %A
1189 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1190 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
1191 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
1195 declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>)
1196 declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>)
1197 declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>)
1198 declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>)
1200 define <8 x i16> @neon.ushll8h_constant_shift(<8 x i8>* %A) nounwind {
1201 ;CHECK-LABEL: neon.ushll8h_constant_shift
1202 ;CHECK: ushll.8h v0, {{v[0-9]+}}, #1
1203 %tmp1 = load <8 x i8>, <8 x i8>* %A
1204 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
1205 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
1209 define <8 x i16> @neon.ushl8h_no_constant_shift(<8 x i8>* %A) nounwind {
1210 ;CHECK-LABEL: neon.ushl8h_no_constant_shift
1211 ;CHECK: ushl.8h v0, v0, v0
1212 %tmp1 = load <8 x i8>, <8 x i8>* %A
1213 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
1214 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp2)
1218 define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(<4 x i8>* %A) nounwind {
1219 ;CHECK-LABEL: @neon.ushl8h_constant_shift_extend_not_2x
1220 ;CHECK-NOT: ushll.8h v0,
1221 ;CHECK: ldrb w8, [x0]
1223 ;CHECK: ldrb w8, [x0, #1]
1224 ;CHECK: mov.s v0[1], w8
1225 ;CHECK: ldrb w8, [x0, #2]
1226 ;CHECK: mov.s v0[2], w8
1227 ;CHECK: ldrb w8, [x0, #3]
1228 ;CHECK: mov.s v0[3], w8
1229 ;CHECK: shl.4s v0, v0, #1
1230 %tmp1 = load <4 x i8>, <4 x i8>* %A
1231 %tmp2 = zext <4 x i8> %tmp1 to <4 x i32>
1232 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
1236 define <8 x i16> @neon.ushl8_noext_constant_shift(<8 x i16>* %A) nounwind {
1237 ; CHECK-LABEL: neon.ushl8_noext_constant_shift
1238 ; CHECK: ldr q0, [x0]
1239 ; CHECK-NEXT: shl.8h v0, v0, #1
1241 %tmp1 = load <8 x i16>, <8 x i16>* %A
1242 %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
1246 define <4 x i32> @neon.ushll4s_constant_shift(<4 x i16>* %A) nounwind {
1247 ;CHECK-LABEL: neon.ushll4s_constant_shift
1248 ;CHECK: ushll.4s v0, {{v[0-9]+}}, #1
1249 %tmp1 = load <4 x i16>, <4 x i16>* %A
1250 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
1251 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
1255 ; FIXME: unnecessary ushll.4s v0, v0, #0?
1256 define <4 x i32> @neon.ushll4s_neg_constant_shift(<4 x i16>* %A) nounwind {
1257 ; CHECK-LABEL: neon.ushll4s_neg_constant_shift
1258 ; CHECK: movi.2d v1, #0xffffffffffffffff
1259 ; CHECK: ushll.4s v0, v0, #0
1260 ; CHECK: ushl.4s v0, v0, v1
1261 %tmp1 = load <4 x i16>, <4 x i16>* %A
1262 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
1263 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
1267 ; FIXME: should be constant folded.
1268 define <4 x i32> @neon.ushll4s_constant_fold() nounwind {
1269 ; CHECK-LABEL: neon.ushll4s_constant_fold
1270 ; CHECK: shl.4s v0, v0, #1
1272 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
1276 define <2 x i64> @neon.ushll2d_constant_shift(<2 x i32>* %A) nounwind {
1277 ;CHECK-LABEL: neon.ushll2d_constant_shift
1278 ;CHECK: ushll.2d v0, {{v[0-9]+}}, #1
1279 %tmp1 = load <2 x i32>, <2 x i32>* %A
1280 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
1281 %tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
1285 define <8 x i16> @sshll8h(<8 x i8>* %A) nounwind {
1286 ;CHECK-LABEL: sshll8h:
1287 ;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
1288 %tmp1 = load <8 x i8>, <8 x i8>* %A
1289 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
1290 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1294 define <2 x i64> @sshll2d(<2 x i32>* %A) nounwind {
1295 ;CHECK-LABEL: sshll2d:
1296 ;CHECK: sshll.2d v0, {{v[0-9]+}}, #1
1297 %tmp1 = load <2 x i32>, <2 x i32>* %A
1298 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
1299 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
1303 declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>)
1304 declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>)
1305 declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>)
1306 declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>)
1308 define <16 x i8> @neon.sshl16b_constant_shift(<16 x i8>* %A) nounwind {
1309 ;CHECK-LABEL: neon.sshl16b_constant_shift
1310 ;CHECK: shl.16b {{v[0-9]+}}, {{v[0-9]+}}, #1
1311 %tmp1 = load <16 x i8>, <16 x i8>* %A
1312 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1316 define <16 x i8> @neon.sshl16b_non_splat_constant_shift(<16 x i8>* %A) nounwind {
1317 ;CHECK-LABEL: neon.sshl16b_non_splat_constant_shift
1318 ;CHECK: sshl.16b {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
1319 %tmp1 = load <16 x i8>, <16 x i8>* %A
1320 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1324 define <16 x i8> @neon.sshl16b_neg_constant_shift(<16 x i8>* %A) nounwind {
1325 ;CHECK-LABEL: neon.sshl16b_neg_constant_shift
1326 ;CHECK: sshl.16b {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
1327 %tmp1 = load <16 x i8>, <16 x i8>* %A
1328 %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
1332 define <8 x i16> @neon.sshll8h_constant_shift(<8 x i8>* %A) nounwind {
1333 ;CHECK-LABEL: neon.sshll8h_constant_shift
1334 ;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
1335 %tmp1 = load <8 x i8>, <8 x i8>* %A
1336 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
1337 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
1341 define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(<4 x i8>* %A) nounwind {
1342 ;CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift
1343 ;CHECK: ldrsb w8, [x0]
1344 ;CHECK-NEXT: fmov s0, w8
1345 ;CHECK-NEXT: ldrsb w8, [x0, #1]
1346 ;CHECK-NEXT: mov.s v0[1], w8
1347 ;CHECK-NEXT: ldrsb w8, [x0, #2]
1348 ;CHECK-NEXT: mov.s v0[2], w8
1349 ;CHECK-NEXT: ldrsb w8, [x0, #3]
1350 ;CHECK-NEXT: mov.s v0[3], w8
1351 ;CHECK-NEXT: shl.4s v0, v0, #1
1352 %tmp1 = load <4 x i8>, <4 x i8>* %A
1353 %tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
1354 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
1358 define <4 x i32> @neon.sshll4s_constant_shift(<4 x i16>* %A) nounwind {
1359 ;CHECK-LABEL: neon.sshll4s_constant_shift
1360 ;CHECK: sshll.4s v0, {{v[0-9]+}}, #1
1361 %tmp1 = load <4 x i16>, <4 x i16>* %A
1362 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
1363 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
1367 define <4 x i32> @neon.sshll4s_neg_constant_shift(<4 x i16>* %A) nounwind {
1368 ;CHECK-LABEL: neon.sshll4s_neg_constant_shift
1369 ;CHECK: movi.2d v1, #0xffffffffffffffff
1370 ;CHECK: sshll.4s v0, v0, #0
1371 ;CHECK: sshl.4s v0, v0, v1
1372 %tmp1 = load <4 x i16>, <4 x i16>* %A
1373 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
1374 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
1378 ; FIXME: should be constant folded.
1379 define <4 x i32> @neon.sshl4s_constant_fold() nounwind {
1380 ;CHECK-LABEL: neon.sshl4s_constant_fold
1381 ;CHECK: shl.4s {{v[0-9]+}}, {{v[0-9]+}}, #2
1382 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
1386 define <4 x i32> @neon.sshl4s_no_fold(<4 x i32>* %A) nounwind {
1387 ;CHECK-LABEL: neon.sshl4s_no_fold
1388 ;CHECK: shl.4s {{v[0-9]+}}, {{v[0-9]+}}, #1
1389 %tmp1 = load <4 x i32>, <4 x i32>* %A
1390 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
1394 define <2 x i64> @neon.sshll2d_constant_shift(<2 x i32>* %A) nounwind {
1395 ;CHECK-LABEL: neon.sshll2d_constant_shift
1396 ;CHECK: sshll.2d v0, {{v[0-9]+}}, #1
1397 %tmp1 = load <2 x i32>, <2 x i32>* %A
1398 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
1399 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
1403 ; FIXME: should be constant folded.
1404 define <2 x i64> @neon.sshl2d_constant_fold() nounwind {
1405 ;CHECK-LABEL: neon.sshl2d_constant_fold
1406 ;CHECK: shl.2d {{v[0-9]+}}, {{v[0-9]+}}, #1
1407 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
1411 define <2 x i64> @neon.sshl2d_no_fold(<2 x i64>* %A) nounwind {
1412 ;CHECK-LABEL: neon.sshl2d_no_fold
1413 ;CHECK: shl.2d {{v[0-9]+}}, {{v[0-9]+}}, #2
1414 %tmp2 = load <2 x i64>, <2 x i64>* %A
1415 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 2, i64 2>)
1419 define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind {
1420 ;CHECK-LABEL: sshll2_8h:
1421 ;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
1422 %load1 = load <16 x i8>, <16 x i8>* %A
1423 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1424 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
1425 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1429 define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind {
1430 ;CHECK-LABEL: sshll2_4s:
1431 ;CHECK: sshll.4s v0, {{v[0-9]+}}, #1
1432 %load1 = load <8 x i16>, <8 x i16>* %A
1433 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1434 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
1435 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
1439 define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind {
1440 ;CHECK-LABEL: sshll2_2d:
1441 ;CHECK: sshll.2d v0, {{v[0-9]+}}, #1
1442 %load1 = load <4 x i32>, <4 x i32>* %A
1443 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1444 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
1445 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
1449 define <8 x i8> @sqshli8b(<8 x i8>* %A) nounwind {
1450 ;CHECK-LABEL: sqshli8b:
1451 ;CHECK: sqshl.8b v0, {{v[0-9]+}}, #1
1452 %tmp1 = load <8 x i8>, <8 x i8>* %A
1453 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1457 define <4 x i16> @sqshli4h(<4 x i16>* %A) nounwind {
1458 ;CHECK-LABEL: sqshli4h:
1459 ;CHECK: sqshl.4h v0, {{v[0-9]+}}, #1
1460 %tmp1 = load <4 x i16>, <4 x i16>* %A
1461 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
1465 define <2 x i32> @sqshli2s(<2 x i32>* %A) nounwind {
1466 ;CHECK-LABEL: sqshli2s:
1467 ;CHECK: sqshl.2s v0, {{v[0-9]+}}, #1
1468 %tmp1 = load <2 x i32>, <2 x i32>* %A
1469 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
1473 define <16 x i8> @sqshli16b(<16 x i8>* %A) nounwind {
1474 ;CHECK-LABEL: sqshli16b:
1475 ;CHECK: sqshl.16b v0, {{v[0-9]+}}, #1
1476 %tmp1 = load <16 x i8>, <16 x i8>* %A
1477 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1481 define <8 x i16> @sqshli8h(<8 x i16>* %A) nounwind {
1482 ;CHECK-LABEL: sqshli8h:
1483 ;CHECK: sqshl.8h v0, {{v[0-9]+}}, #1
1484 %tmp1 = load <8 x i16>, <8 x i16>* %A
1485 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
1489 define <4 x i32> @sqshli4s(<4 x i32>* %A) nounwind {
1490 ;CHECK-LABEL: sqshli4s:
1491 ;CHECK: sqshl.4s v0, {{v[0-9]+}}, #1
1492 %tmp1 = load <4 x i32>, <4 x i32>* %A
1493 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
1497 define <2 x i64> @sqshli2d(<2 x i64>* %A) nounwind {
1498 ;CHECK-LABEL: sqshli2d:
1499 ;CHECK: sqshl.2d v0, {{v[0-9]+}}, #1
1500 %tmp1 = load <2 x i64>, <2 x i64>* %A
1501 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
1505 define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind {
1506 ;CHECK-LABEL: uqshli8b:
1507 ;CHECK: uqshl.8b v0, {{v[0-9]+}}, #1
1508 %tmp1 = load <8 x i8>, <8 x i8>* %A
1509 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1513 define <8 x i8> @uqshli8b_1(<8 x i8>* %A) nounwind {
1514 ;CHECK-LABEL: uqshli8b_1:
1515 ;CHECK: movi.8b [[REG:v[0-9]+]], #8
1516 ;CHECK: uqshl.8b v0, v0, [[REG]]
1517 %tmp1 = load <8 x i8>, <8 x i8>* %A
1518 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
1522 define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind {
1523 ;CHECK-LABEL: uqshli4h:
1524 ;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1
1525 %tmp1 = load <4 x i16>, <4 x i16>* %A
1526 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
1530 define <2 x i32> @uqshli2s(<2 x i32>* %A) nounwind {
1531 ;CHECK-LABEL: uqshli2s:
1532 ;CHECK: uqshl.2s v0, {{v[0-9]+}}, #1
1533 %tmp1 = load <2 x i32>, <2 x i32>* %A
1534 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
1538 define <16 x i8> @uqshli16b(<16 x i8>* %A) nounwind {
1539 ;CHECK-LABEL: uqshli16b:
1541 %tmp1 = load <16 x i8>, <16 x i8>* %A
1542 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
1546 define <8 x i16> @uqshli8h(<8 x i16>* %A) nounwind {
1547 ;CHECK-LABEL: uqshli8h:
1548 ;CHECK: uqshl.8h v0, {{v[0-9]+}}, #1
1549 %tmp1 = load <8 x i16>, <8 x i16>* %A
1550 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
1554 define <4 x i32> @uqshli4s(<4 x i32>* %A) nounwind {
1555 ;CHECK-LABEL: uqshli4s:
1556 ;CHECK: uqshl.4s v0, {{v[0-9]+}}, #1
1557 %tmp1 = load <4 x i32>, <4 x i32>* %A
1558 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
1562 define <2 x i64> @uqshli2d(<2 x i64>* %A) nounwind {
1563 ;CHECK-LABEL: uqshli2d:
1564 ;CHECK: uqshl.2d v0, {{v[0-9]+}}, #1
1565 %tmp1 = load <2 x i64>, <2 x i64>* %A
1566 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
1570 define <8 x i8> @ursra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
1571 ;CHECK-LABEL: ursra8b:
1572 ;CHECK: ursra.8b v0, {{v[0-9]+}}, #1
1573 %tmp1 = load <8 x i8>, <8 x i8>* %A
1574 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1575 %tmp4 = load <8 x i8>, <8 x i8>* %B
1576 %tmp5 = add <8 x i8> %tmp3, %tmp4
1580 define <4 x i16> @ursra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
1581 ;CHECK-LABEL: ursra4h:
1582 ;CHECK: ursra.4h v0, {{v[0-9]+}}, #1
1583 %tmp1 = load <4 x i16>, <4 x i16>* %A
1584 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
1585 %tmp4 = load <4 x i16>, <4 x i16>* %B
1586 %tmp5 = add <4 x i16> %tmp3, %tmp4
1590 define <2 x i32> @ursra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
1591 ;CHECK-LABEL: ursra2s:
1592 ;CHECK: ursra.2s v0, {{v[0-9]+}}, #1
1593 %tmp1 = load <2 x i32>, <2 x i32>* %A
1594 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
1595 %tmp4 = load <2 x i32>, <2 x i32>* %B
1596 %tmp5 = add <2 x i32> %tmp3, %tmp4
1600 define <16 x i8> @ursra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
1601 ;CHECK-LABEL: ursra16b:
1602 ;CHECK: ursra.16b v0, {{v[0-9]+}}, #1
1603 %tmp1 = load <16 x i8>, <16 x i8>* %A
1604 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1605 %tmp4 = load <16 x i8>, <16 x i8>* %B
1606 %tmp5 = add <16 x i8> %tmp3, %tmp4
1610 define <8 x i16> @ursra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
1611 ;CHECK-LABEL: ursra8h:
1612 ;CHECK: ursra.8h v0, {{v[0-9]+}}, #1
1613 %tmp1 = load <8 x i16>, <8 x i16>* %A
1614 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
1615 %tmp4 = load <8 x i16>, <8 x i16>* %B
1616 %tmp5 = add <8 x i16> %tmp3, %tmp4
1620 define <4 x i32> @ursra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
1621 ;CHECK-LABEL: ursra4s:
1622 ;CHECK: ursra.4s v0, {{v[0-9]+}}, #1
1623 %tmp1 = load <4 x i32>, <4 x i32>* %A
1624 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
1625 %tmp4 = load <4 x i32>, <4 x i32>* %B
1626 %tmp5 = add <4 x i32> %tmp3, %tmp4
1630 define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
1631 ;CHECK-LABEL: ursra2d:
1632 ;CHECK: ursra.2d v0, {{v[0-9]+}}, #1
1633 %tmp1 = load <2 x i64>, <2 x i64>* %A
1634 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
1635 %tmp4 = load <2 x i64>, <2 x i64>* %B
1636 %tmp5 = add <2 x i64> %tmp3, %tmp4
1640 define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
1641 ;CHECK-LABEL: srsra8b:
1642 ;CHECK: srsra.8b v0, {{v[0-9]+}}, #1
1643 %tmp1 = load <8 x i8>, <8 x i8>* %A
1644 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1645 %tmp4 = load <8 x i8>, <8 x i8>* %B
1646 %tmp5 = add <8 x i8> %tmp3, %tmp4
1650 define <4 x i16> @srsra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
1651 ;CHECK-LABEL: srsra4h:
1652 ;CHECK: srsra.4h v0, {{v[0-9]+}}, #1
1653 %tmp1 = load <4 x i16>, <4 x i16>* %A
1654 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
1655 %tmp4 = load <4 x i16>, <4 x i16>* %B
1656 %tmp5 = add <4 x i16> %tmp3, %tmp4
1660 define <2 x i32> @srsra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
1661 ;CHECK-LABEL: srsra2s:
1662 ;CHECK: srsra.2s v0, {{v[0-9]+}}, #1
1663 %tmp1 = load <2 x i32>, <2 x i32>* %A
1664 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
1665 %tmp4 = load <2 x i32>, <2 x i32>* %B
1666 %tmp5 = add <2 x i32> %tmp3, %tmp4
1670 define <16 x i8> @srsra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
1671 ;CHECK-LABEL: srsra16b:
1672 ;CHECK: srsra.16b v0, {{v[0-9]+}}, #1
1673 %tmp1 = load <16 x i8>, <16 x i8>* %A
1674 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
1675 %tmp4 = load <16 x i8>, <16 x i8>* %B
1676 %tmp5 = add <16 x i8> %tmp3, %tmp4
1680 define <8 x i16> @srsra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
1681 ;CHECK-LABEL: srsra8h:
1682 ;CHECK: srsra.8h v0, {{v[0-9]+}}, #1
1683 %tmp1 = load <8 x i16>, <8 x i16>* %A
1684 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
1685 %tmp4 = load <8 x i16>, <8 x i16>* %B
1686 %tmp5 = add <8 x i16> %tmp3, %tmp4
1690 define <4 x i32> @srsra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
1691 ;CHECK-LABEL: srsra4s:
1692 ;CHECK: srsra.4s v0, {{v[0-9]+}}, #1
1693 %tmp1 = load <4 x i32>, <4 x i32>* %A
1694 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
1695 %tmp4 = load <4 x i32>, <4 x i32>* %B
1696 %tmp5 = add <4 x i32> %tmp3, %tmp4
1700 define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
1701 ;CHECK-LABEL: srsra2d:
1702 ;CHECK: srsra.2d v0, {{v[0-9]+}}, #1
1703 %tmp1 = load <2 x i64>, <2 x i64>* %A
1704 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
1705 %tmp4 = load <2 x i64>, <2 x i64>* %B
1706 %tmp5 = add <2 x i64> %tmp3, %tmp4
1710 define <8 x i8> @usra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
1711 ;CHECK-LABEL: usra8b:
1712 ;CHECK: usra.8b v0, {{v[0-9]+}}, #1
1713 %tmp1 = load <8 x i8>, <8 x i8>* %A
1714 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1715 %tmp4 = load <8 x i8>, <8 x i8>* %B
1716 %tmp5 = add <8 x i8> %tmp3, %tmp4
1720 define <4 x i16> @usra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
1721 ;CHECK-LABEL: usra4h:
1722 ;CHECK: usra.4h v0, {{v[0-9]+}}, #1
1723 %tmp1 = load <4 x i16>, <4 x i16>* %A
1724 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
1725 %tmp4 = load <4 x i16>, <4 x i16>* %B
1726 %tmp5 = add <4 x i16> %tmp3, %tmp4
1730 define <2 x i32> @usra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
1731 ;CHECK-LABEL: usra2s:
1732 ;CHECK: usra.2s v0, {{v[0-9]+}}, #1
1733 %tmp1 = load <2 x i32>, <2 x i32>* %A
1734 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
1735 %tmp4 = load <2 x i32>, <2 x i32>* %B
1736 %tmp5 = add <2 x i32> %tmp3, %tmp4
1740 define <16 x i8> @usra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
1741 ;CHECK-LABEL: usra16b:
1742 ;CHECK: usra.16b v0, {{v[0-9]+}}, #1
1743 %tmp1 = load <16 x i8>, <16 x i8>* %A
1744 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1745 %tmp4 = load <16 x i8>, <16 x i8>* %B
1746 %tmp5 = add <16 x i8> %tmp3, %tmp4
1750 define <8 x i16> @usra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
1751 ;CHECK-LABEL: usra8h:
1752 ;CHECK: usra.8h v0, {{v[0-9]+}}, #1
1753 %tmp1 = load <8 x i16>, <8 x i16>* %A
1754 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1755 %tmp4 = load <8 x i16>, <8 x i16>* %B
1756 %tmp5 = add <8 x i16> %tmp3, %tmp4
1760 define <4 x i32> @usra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
1761 ;CHECK-LABEL: usra4s:
1762 ;CHECK: usra.4s v0, {{v[0-9]+}}, #1
1763 %tmp1 = load <4 x i32>, <4 x i32>* %A
1764 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
1765 %tmp4 = load <4 x i32>, <4 x i32>* %B
1766 %tmp5 = add <4 x i32> %tmp3, %tmp4
1770 define <2 x i64> @usra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
1771 ;CHECK-LABEL: usra2d:
1772 ;CHECK: usra.2d v0, {{v[0-9]+}}, #1
1773 %tmp1 = load <2 x i64>, <2 x i64>* %A
1774 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
1775 %tmp4 = load <2 x i64>, <2 x i64>* %B
1776 %tmp5 = add <2 x i64> %tmp3, %tmp4
1780 define <8 x i8> @ssra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
1781 ;CHECK-LABEL: ssra8b:
1782 ;CHECK: ssra.8b v0, {{v[0-9]+}}, #1
1783 %tmp1 = load <8 x i8>, <8 x i8>* %A
1784 %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1785 %tmp4 = load <8 x i8>, <8 x i8>* %B
1786 %tmp5 = add <8 x i8> %tmp3, %tmp4
1790 define <4 x i16> @ssra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
1791 ;CHECK-LABEL: ssra4h:
1792 ;CHECK: ssra.4h v0, {{v[0-9]+}}, #1
1793 %tmp1 = load <4 x i16>, <4 x i16>* %A
1794 %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
1795 %tmp4 = load <4 x i16>, <4 x i16>* %B
1796 %tmp5 = add <4 x i16> %tmp3, %tmp4
1800 define <2 x i32> @ssra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
1801 ;CHECK-LABEL: ssra2s:
1802 ;CHECK: ssra.2s v0, {{v[0-9]+}}, #1
1803 %tmp1 = load <2 x i32>, <2 x i32>* %A
1804 %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
1805 %tmp4 = load <2 x i32>, <2 x i32>* %B
1806 %tmp5 = add <2 x i32> %tmp3, %tmp4
1810 define <16 x i8> @ssra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
1811 ;CHECK-LABEL: ssra16b:
1812 ;CHECK: ssra.16b v0, {{v[0-9]+}}, #1
1813 %tmp1 = load <16 x i8>, <16 x i8>* %A
1814 %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1815 %tmp4 = load <16 x i8>, <16 x i8>* %B
1816 %tmp5 = add <16 x i8> %tmp3, %tmp4
1820 define <8 x i16> @ssra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
1821 ;CHECK-LABEL: ssra8h:
1822 ;CHECK: ssra.8h v0, {{v[0-9]+}}, #1
1823 %tmp1 = load <8 x i16>, <8 x i16>* %A
1824 %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1825 %tmp4 = load <8 x i16>, <8 x i16>* %B
1826 %tmp5 = add <8 x i16> %tmp3, %tmp4
1830 define <4 x i32> @ssra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
1831 ;CHECK-LABEL: ssra4s:
1832 ;CHECK: ssra.4s v0, {{v[0-9]+}}, #1
1833 %tmp1 = load <4 x i32>, <4 x i32>* %A
1834 %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
1835 %tmp4 = load <4 x i32>, <4 x i32>* %B
1836 %tmp5 = add <4 x i32> %tmp3, %tmp4
1840 define <2 x i64> @ssra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
1841 ;CHECK-LABEL: ssra2d:
1842 ;CHECK: ssra.2d v0, {{v[0-9]+}}, #1
1843 %tmp1 = load <2 x i64>, <2 x i64>* %A
1844 %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
1845 %tmp4 = load <2 x i64>, <2 x i64>* %B
1846 %tmp5 = add <2 x i64> %tmp3, %tmp4
1850 define <8 x i8> @shr_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
1851 ;CHECK-LABEL: shr_orr8b:
1852 ;CHECK: shr.8b v0, {{v[0-9]+}}, #1
1855 %tmp1 = load <8 x i8>, <8 x i8>* %A
1856 %tmp4 = load <8 x i8>, <8 x i8>* %B
1857 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1858 %tmp5 = or <8 x i8> %tmp3, %tmp4
1862 define <4 x i16> @shr_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
1863 ;CHECK-LABEL: shr_orr4h:
1864 ;CHECK: shr.4h v0, {{v[0-9]+}}, #1
1867 %tmp1 = load <4 x i16>, <4 x i16>* %A
1868 %tmp4 = load <4 x i16>, <4 x i16>* %B
1869 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
1870 %tmp5 = or <4 x i16> %tmp3, %tmp4
1874 define <2 x i32> @shr_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
1875 ;CHECK-LABEL: shr_orr2s:
1876 ;CHECK: shr.2s v0, {{v[0-9]+}}, #1
1879 %tmp1 = load <2 x i32>, <2 x i32>* %A
1880 %tmp4 = load <2 x i32>, <2 x i32>* %B
1881 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
1882 %tmp5 = or <2 x i32> %tmp3, %tmp4
1886 define <16 x i8> @shr_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
1887 ;CHECK-LABEL: shr_orr16b:
1888 ;CHECK: shr.16b v0, {{v[0-9]+}}, #1
1889 ;CHECK-NEXT: orr.16b
1891 %tmp1 = load <16 x i8>, <16 x i8>* %A
1892 %tmp4 = load <16 x i8>, <16 x i8>* %B
1893 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1894 %tmp5 = or <16 x i8> %tmp3, %tmp4
1898 define <8 x i16> @shr_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
1899 ;CHECK-LABEL: shr_orr8h:
1900 ;CHECK: shr.8h v0, {{v[0-9]+}}, #1
1901 ;CHECK-NEXT: orr.16b
1903 %tmp1 = load <8 x i16>, <8 x i16>* %A
1904 %tmp4 = load <8 x i16>, <8 x i16>* %B
1905 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1906 %tmp5 = or <8 x i16> %tmp3, %tmp4
1910 define <4 x i32> @shr_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
1911 ;CHECK-LABEL: shr_orr4s:
1912 ;CHECK: shr.4s v0, {{v[0-9]+}}, #1
1913 ;CHECK-NEXT: orr.16b
1915 %tmp1 = load <4 x i32>, <4 x i32>* %A
1916 %tmp4 = load <4 x i32>, <4 x i32>* %B
1917 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
1918 %tmp5 = or <4 x i32> %tmp3, %tmp4
1922 define <2 x i64> @shr_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
1923 ;CHECK-LABEL: shr_orr2d:
1924 ;CHECK: shr.2d v0, {{v[0-9]+}}, #1
1925 ;CHECK-NEXT: orr.16b
1927 %tmp1 = load <2 x i64>, <2 x i64>* %A
1928 %tmp4 = load <2 x i64>, <2 x i64>* %B
1929 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
1930 %tmp5 = or <2 x i64> %tmp3, %tmp4
1934 define <8 x i8> @shl_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
1935 ;CHECK-LABEL: shl_orr8b:
1936 ;CHECK: shl.8b v0, {{v[0-9]+}}, #1
1939 %tmp1 = load <8 x i8>, <8 x i8>* %A
1940 %tmp4 = load <8 x i8>, <8 x i8>* %B
1941 %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1942 %tmp5 = or <8 x i8> %tmp3, %tmp4
1946 define <4 x i16> @shl_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
1947 ;CHECK-LABEL: shl_orr4h:
1948 ;CHECK: shl.4h v0, {{v[0-9]+}}, #1
1951 %tmp1 = load <4 x i16>, <4 x i16>* %A
1952 %tmp4 = load <4 x i16>, <4 x i16>* %B
1953 %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
1954 %tmp5 = or <4 x i16> %tmp3, %tmp4
1958 define <2 x i32> @shl_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
1959 ;CHECK-LABEL: shl_orr2s:
1960 ;CHECK: shl.2s v0, {{v[0-9]+}}, #1
1963 %tmp1 = load <2 x i32>, <2 x i32>* %A
1964 %tmp4 = load <2 x i32>, <2 x i32>* %B
1965 %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
1966 %tmp5 = or <2 x i32> %tmp3, %tmp4
1970 define <16 x i8> @shl_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
1971 ;CHECK-LABEL: shl_orr16b:
1972 ;CHECK: shl.16b v0, {{v[0-9]+}}, #1
1973 ;CHECK-NEXT: orr.16b
1975 %tmp1 = load <16 x i8>, <16 x i8>* %A
1976 %tmp4 = load <16 x i8>, <16 x i8>* %B
1977 %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1978 %tmp5 = or <16 x i8> %tmp3, %tmp4
1982 define <8 x i16> @shl_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
1983 ;CHECK-LABEL: shl_orr8h:
1984 ;CHECK: shl.8h v0, {{v[0-9]+}}, #1
1985 ;CHECK-NEXT: orr.16b
1987 %tmp1 = load <8 x i16>, <8 x i16>* %A
1988 %tmp4 = load <8 x i16>, <8 x i16>* %B
1989 %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1990 %tmp5 = or <8 x i16> %tmp3, %tmp4
1994 define <4 x i32> @shl_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
1995 ;CHECK-LABEL: shl_orr4s:
1996 ;CHECK: shl.4s v0, {{v[0-9]+}}, #1
1997 ;CHECK-NEXT: orr.16b
1999 %tmp1 = load <4 x i32>, <4 x i32>* %A
2000 %tmp4 = load <4 x i32>, <4 x i32>* %B
2001 %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
2002 %tmp5 = or <4 x i32> %tmp3, %tmp4
2006 define <2 x i64> @shl_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
2007 ;CHECK-LABEL: shl_orr2d:
2008 ;CHECK: shl.2d v0, {{v[0-9]+}}, #1
2009 ;CHECK-NEXT: orr.16b
2011 %tmp1 = load <2 x i64>, <2 x i64>* %A
2012 %tmp4 = load <2 x i64>, <2 x i64>* %B
2013 %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
2014 %tmp5 = or <2 x i64> %tmp3, %tmp4
2018 define <8 x i16> @shll(<8 x i8> %in) {
2019 ; CHECK-LABEL: shll:
2020 ; CHECK: shll.8h v0, {{v[0-9]+}}, #8
2021 %ext = zext <8 x i8> %in to <8 x i16>
2022 %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
2026 define <4 x i32> @shll_high(<8 x i16> %in) {
2027 ; CHECK-LABEL: shll_high
2028 ; CHECK: shll2.4s v0, {{v[0-9]+}}, #16
2029 %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2030 %ext = zext <4 x i16> %extract to <4 x i32>
2031 %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
2035 define <8 x i8> @sli8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
2036 ;CHECK-LABEL: sli8b:
2037 ;CHECK: sli.8b v0, {{v[0-9]+}}, #1
2038 %tmp1 = load <8 x i8>, <8 x i8>* %A
2039 %tmp2 = load <8 x i8>, <8 x i8>* %B
2040 %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
2044 define <4 x i16> @sli4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
2045 ;CHECK-LABEL: sli4h:
2046 ;CHECK: sli.4h v0, {{v[0-9]+}}, #1
2047 %tmp1 = load <4 x i16>, <4 x i16>* %A
2048 %tmp2 = load <4 x i16>, <4 x i16>* %B
2049 %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
2053 define <2 x i32> @sli2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
2054 ;CHECK-LABEL: sli2s:
2055 ;CHECK: sli.2s v0, {{v[0-9]+}}, #1
2056 %tmp1 = load <2 x i32>, <2 x i32>* %A
2057 %tmp2 = load <2 x i32>, <2 x i32>* %B
2058 %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
2062 define <1 x i64> @sli1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
2063 ;CHECK-LABEL: sli1d:
2064 ;CHECK: sli d0, {{d[0-9]+}}, #1
2065 %tmp1 = load <1 x i64>, <1 x i64>* %A
2066 %tmp2 = load <1 x i64>, <1 x i64>* %B
2067 %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
2071 define <16 x i8> @sli16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
2072 ;CHECK-LABEL: sli16b:
2073 ;CHECK: sli.16b v0, {{v[0-9]+}}, #1
2074 %tmp1 = load <16 x i8>, <16 x i8>* %A
2075 %tmp2 = load <16 x i8>, <16 x i8>* %B
2076 %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
2080 define <8 x i16> @sli8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
2081 ;CHECK-LABEL: sli8h:
2082 ;CHECK: sli.8h v0, {{v[0-9]+}}, #1
2083 %tmp1 = load <8 x i16>, <8 x i16>* %A
2084 %tmp2 = load <8 x i16>, <8 x i16>* %B
2085 %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
2089 define <4 x i32> @sli4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
2090 ;CHECK-LABEL: sli4s:
2091 ;CHECK: sli.4s v0, {{v[0-9]+}}, #1
2092 %tmp1 = load <4 x i32>, <4 x i32>* %A
2093 %tmp2 = load <4 x i32>, <4 x i32>* %B
2094 %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
2098 define <2 x i64> @sli2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
2099 ;CHECK-LABEL: sli2d:
2100 ;CHECK: sli.2d v0, {{v[0-9]+}}, #1
2101 %tmp1 = load <2 x i64>, <2 x i64>* %A
2102 %tmp2 = load <2 x i64>, <2 x i64>* %B
2103 %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
2107 declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
2108 declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
2109 declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
2110 declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
2112 declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
2113 declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
2114 declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
2115 declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
2117 define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
2118 ; CHECK-LABEL: ashr_v1i64:
2119 ; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
2120 ; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
2121 %c = ashr <1 x i64> %a, %b