1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
3 define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
4 ; CHECK: test_vshr_n_s8
5 ; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
6 %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
10 define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
11 ; CHECK: test_vshr_n_s16
12 ; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
13 %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
17 define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
18 ; CHECK: test_vshr_n_s32
19 ; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
20 %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
24 define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
25 ; CHECK: test_vshrq_n_s8
26 ; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
27 %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
31 define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
32 ; CHECK: test_vshrq_n_s16
33 ; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
34 %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
38 define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
39 ; CHECK: test_vshrq_n_s32
40 ; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
41 %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
45 define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
46 ; CHECK: test_vshrq_n_s64
47 ; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
48 %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
52 define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
53 ; CHECK: test_vshr_n_u8
54 ; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
55 %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
59 define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
60 ; CHECK: test_vshr_n_u16
61 ; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
62 %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
66 define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
67 ; CHECK: test_vshr_n_u32
68 ; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
69 %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
73 define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
74 ; CHECK: test_vshrq_n_u8
75 ; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
76 %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
80 define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
81 ; CHECK: test_vshrq_n_u16
82 ; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
83 %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
87 define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
88 ; CHECK: test_vshrq_n_u32
89 ; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
90 %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
94 define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
95 ; CHECK: test_vshrq_n_u64
96 ; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
97 %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
101 define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
102 ; CHECK: test_vsra_n_s8
103 ; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
104 %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
105 %1 = add <8 x i8> %vsra_n, %a
109 define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
110 ; CHECK: test_vsra_n_s16
111 ; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
112 %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
113 %1 = add <4 x i16> %vsra_n, %a
117 define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
118 ; CHECK: test_vsra_n_s32
119 ; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
120 %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
121 %1 = add <2 x i32> %vsra_n, %a
125 define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
126 ; CHECK: test_vsraq_n_s8
127 ; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
128 %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
129 %1 = add <16 x i8> %vsra_n, %a
133 define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
134 ; CHECK: test_vsraq_n_s16
135 ; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
136 %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
137 %1 = add <8 x i16> %vsra_n, %a
141 define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
142 ; CHECK: test_vsraq_n_s32
143 ; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
144 %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
145 %1 = add <4 x i32> %vsra_n, %a
149 define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
150 ; CHECK: test_vsraq_n_s64
151 ; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
152 %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
153 %1 = add <2 x i64> %vsra_n, %a
157 define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
158 ; CHECK: test_vsra_n_u8
159 ; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
160 %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
161 %1 = add <8 x i8> %vsra_n, %a
165 define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
166 ; CHECK: test_vsra_n_u16
167 ; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
168 %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
169 %1 = add <4 x i16> %vsra_n, %a
173 define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
174 ; CHECK: test_vsra_n_u32
175 ; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
176 %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
177 %1 = add <2 x i32> %vsra_n, %a
181 define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
182 ; CHECK: test_vsraq_n_u8
183 ; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
184 %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
185 %1 = add <16 x i8> %vsra_n, %a
189 define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
190 ; CHECK: test_vsraq_n_u16
191 ; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
192 %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
193 %1 = add <8 x i16> %vsra_n, %a
197 define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
198 ; CHECK: test_vsraq_n_u32
199 ; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
200 %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
201 %1 = add <4 x i32> %vsra_n, %a
205 define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
206 ; CHECK: test_vsraq_n_u64
207 ; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
208 %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
209 %1 = add <2 x i64> %vsra_n, %a
213 define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
214 ; CHECK: test_vshrn_n_s16
215 ; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
216 %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
217 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
218 ret <8 x i8> %vshrn_n
221 define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
222 ; CHECK: test_vshrn_n_s32
223 ; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
224 %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
225 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
226 ret <4 x i16> %vshrn_n
229 define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
230 ; CHECK: test_vshrn_n_s64
231 ; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
232 %1 = ashr <2 x i64> %a, <i64 19, i64 19>
233 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
234 ret <2 x i32> %vshrn_n
237 define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
238 ; CHECK: test_vshrn_n_u16
239 ; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
240 %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
241 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
242 ret <8 x i8> %vshrn_n
245 define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
246 ; CHECK: test_vshrn_n_u32
247 ; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
248 %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
249 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
250 ret <4 x i16> %vshrn_n
253 define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
254 ; CHECK: test_vshrn_n_u64
255 ; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
256 %1 = lshr <2 x i64> %a, <i64 19, i64 19>
257 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
258 ret <2 x i32> %vshrn_n
261 define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
262 ; CHECK: test_vshrn_high_n_s16
263 ; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
264 %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
265 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
266 %2 = bitcast <8 x i8> %a to <1 x i64>
267 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
268 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
269 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
273 define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
274 ; CHECK: test_vshrn_high_n_s32
275 ; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
276 %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
277 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
278 %2 = bitcast <4 x i16> %a to <1 x i64>
279 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
280 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
281 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
285 define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
286 ; CHECK: test_vshrn_high_n_s64
287 ; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
288 %1 = bitcast <2 x i32> %a to <1 x i64>
289 %2 = ashr <2 x i64> %b, <i64 19, i64 19>
290 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
291 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
292 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
293 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
297 define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
298 ; CHECK: test_vshrn_high_n_u16
299 ; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
300 %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
301 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
302 %2 = bitcast <8 x i8> %a to <1 x i64>
303 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
304 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
305 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
309 define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
310 ; CHECK: test_vshrn_high_n_u32
311 ; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
312 %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
313 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
314 %2 = bitcast <4 x i16> %a to <1 x i64>
315 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
316 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
317 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
321 define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
322 ; CHECK: test_vshrn_high_n_u64
323 ; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
324 %1 = bitcast <2 x i32> %a to <1 x i64>
325 %2 = lshr <2 x i64> %b, <i64 19, i64 19>
326 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
327 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
328 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
329 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
333 define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
334 ; CHECK: test_vqshrun_high_n_s16
335 ; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
336 %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
337 %1 = bitcast <8 x i8> %a to <1 x i64>
338 %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
339 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
340 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
344 define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
345 ; CHECK: test_vqshrun_high_n_s32
346 ; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
347 %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
348 %1 = bitcast <4 x i16> %a to <1 x i64>
349 %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
350 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
351 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
355 define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
356 ; CHECK: test_vqshrun_high_n_s64
357 ; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
358 %1 = bitcast <2 x i32> %a to <1 x i64>
359 %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19)
360 %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
361 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
362 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
366 define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
367 ; CHECK: test_vrshrn_high_n_s16
368 ; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
369 %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
370 %1 = bitcast <8 x i8> %a to <1 x i64>
371 %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
372 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
373 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
377 define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
378 ; CHECK: test_vrshrn_high_n_s32
379 ; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
380 %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
381 %1 = bitcast <4 x i16> %a to <1 x i64>
382 %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
383 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
384 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
388 define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
389 ; CHECK: test_vrshrn_high_n_s64
390 ; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
391 %1 = bitcast <2 x i32> %a to <1 x i64>
392 %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %b, i32 19)
393 %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
394 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
395 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
399 define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
400 ; CHECK: test_vqrshrun_high_n_s16
401 ; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
402 %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
403 %1 = bitcast <8 x i8> %a to <1 x i64>
404 %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
405 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
406 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
410 define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
411 ; CHECK: test_vqrshrun_high_n_s32
412 ; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
413 %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
414 %1 = bitcast <4 x i16> %a to <1 x i64>
415 %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
416 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
417 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
421 define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
422 ; CHECK: test_vqrshrun_high_n_s64
423 ; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
424 %1 = bitcast <2 x i32> %a to <1 x i64>
425 %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19)
426 %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
427 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
428 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
432 define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
433 ; CHECK: test_vqshrn_high_n_s16
434 ; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
435 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
436 %1 = bitcast <8 x i8> %a to <1 x i64>
437 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
438 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
439 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
443 define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
444 ; CHECK: test_vqshrn_high_n_s32
445 ; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
446 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
447 %1 = bitcast <4 x i16> %a to <1 x i64>
448 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
449 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
450 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
454 define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
455 ; CHECK: test_vqshrn_high_n_s64
456 ; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
457 %1 = bitcast <2 x i32> %a to <1 x i64>
458 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19)
459 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
460 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
461 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
465 define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
466 ; CHECK: test_vqshrn_high_n_u16
467 ; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
468 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
469 %1 = bitcast <8 x i8> %a to <1 x i64>
470 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
471 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
472 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
476 define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
477 ; CHECK: test_vqshrn_high_n_u32
478 ; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
479 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
480 %1 = bitcast <4 x i16> %a to <1 x i64>
481 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
482 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
483 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
487 define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
488 ; CHECK: test_vqshrn_high_n_u64
489 ; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
490 %1 = bitcast <2 x i32> %a to <1 x i64>
491 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19)
492 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
493 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
494 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
498 define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
499 ; CHECK: test_vqrshrn_high_n_s16
500 ; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
501 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
502 %1 = bitcast <8 x i8> %a to <1 x i64>
503 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
504 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
505 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
509 define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
510 ; CHECK: test_vqrshrn_high_n_s32
511 ; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
512 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
513 %1 = bitcast <4 x i16> %a to <1 x i64>
514 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
515 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
516 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
520 define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
521 ; CHECK: test_vqrshrn_high_n_s64
522 ; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
523 %1 = bitcast <2 x i32> %a to <1 x i64>
524 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19)
525 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
526 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
527 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
531 define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
532 ; CHECK: test_vqrshrn_high_n_u16
533 ; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
534 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
535 %1 = bitcast <8 x i8> %a to <1 x i64>
536 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
537 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
538 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
542 define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
543 ; CHECK: test_vqrshrn_high_n_u32
544 ; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
545 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
546 %1 = bitcast <4 x i16> %a to <1 x i64>
547 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
548 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
549 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
553 define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
554 ; CHECK: test_vqrshrn_high_n_u64
555 ; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
556 %1 = bitcast <2 x i32> %a to <1 x i64>
557 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19)
558 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
559 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
560 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
566 declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32)
568 declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32)
570 declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32)
572 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
574 declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32)
576 declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32)
578 declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32)
580 declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32)
582 declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32)
584 declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32)
586 declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32)
588 declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32)
590 declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32)
592 declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32)
594 declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32)
596 declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32)
598 declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32)
600 declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32)
602 declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32)
604 declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32)
606 declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32)
608 declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
610 declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
612 declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
614 declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
616 declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
618 declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
620 declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
622 declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
624 declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
626 declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
628 declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
630 declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
632 define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
633 ; CHECK-LABEL: test_vcvt_n_s64_f64
634 ; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
635 %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
639 define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
640 ; CHECK-LABEL: test_vcvt_n_u64_f64
641 ; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
642 %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
646 define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
647 ; CHECK-LABEL: test_vcvt_n_f64_s64
648 ; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
649 %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
653 define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
654 ; CHECK-LABEL: test_vcvt_n_f64_u64
655 ; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
656 %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
660 declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
661 declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
662 declare <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
663 declare <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)