1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
4 define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
5 ; CHECK-LABEL: test_vshr_n_s8:
7 ; CHECK-NEXT: sshr v0.8b, v0.8b, #3
9 %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
13 define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
14 ; CHECK-LABEL: test_vshr_n_s16:
16 ; CHECK-NEXT: sshr v0.4h, v0.4h, #3
18 %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
22 define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
23 ; CHECK-LABEL: test_vshr_n_s32:
25 ; CHECK-NEXT: sshr v0.2s, v0.2s, #3
27 %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
31 define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
32 ; CHECK-LABEL: test_vshrq_n_s8:
34 ; CHECK-NEXT: sshr v0.16b, v0.16b, #3
36 %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
40 define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
41 ; CHECK-LABEL: test_vshrq_n_s16:
43 ; CHECK-NEXT: sshr v0.8h, v0.8h, #3
45 %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
49 define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
50 ; CHECK-LABEL: test_vshrq_n_s32:
52 ; CHECK-NEXT: sshr v0.4s, v0.4s, #3
54 %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
58 define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
59 ; CHECK-LABEL: test_vshrq_n_s64:
61 ; CHECK-NEXT: sshr v0.2d, v0.2d, #3
63 %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
67 define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
68 ; CHECK-LABEL: test_vshr_n_u8:
70 ; CHECK-NEXT: ushr v0.8b, v0.8b, #3
72 %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
76 define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
77 ; CHECK-LABEL: test_vshr_n_u16:
79 ; CHECK-NEXT: ushr v0.4h, v0.4h, #3
81 %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
85 define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
86 ; CHECK-LABEL: test_vshr_n_u32:
88 ; CHECK-NEXT: ushr v0.2s, v0.2s, #3
90 %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
94 define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
95 ; CHECK-LABEL: test_vshrq_n_u8:
97 ; CHECK-NEXT: ushr v0.16b, v0.16b, #3
99 %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
100 ret <16 x i8> %vshr_n
103 define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
104 ; CHECK-LABEL: test_vshrq_n_u16:
106 ; CHECK-NEXT: ushr v0.8h, v0.8h, #3
108 %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
109 ret <8 x i16> %vshr_n
112 define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
113 ; CHECK-LABEL: test_vshrq_n_u32:
115 ; CHECK-NEXT: ushr v0.4s, v0.4s, #3
117 %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
118 ret <4 x i32> %vshr_n
121 define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
122 ; CHECK-LABEL: test_vshrq_n_u64:
124 ; CHECK-NEXT: ushr v0.2d, v0.2d, #3
126 %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
127 ret <2 x i64> %vshr_n
130 define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
131 ; CHECK-LABEL: test_vsra_n_s8:
133 ; CHECK-NEXT: ssra v0.8b, v1.8b, #3
135 %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
136 %1 = add <8 x i8> %vsra_n, %a
140 define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
141 ; CHECK-LABEL: test_vsra_n_s16:
143 ; CHECK-NEXT: ssra v0.4h, v1.4h, #3
145 %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
146 %1 = add <4 x i16> %vsra_n, %a
150 define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
151 ; CHECK-LABEL: test_vsra_n_s32:
153 ; CHECK-NEXT: ssra v0.2s, v1.2s, #3
155 %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
156 %1 = add <2 x i32> %vsra_n, %a
160 define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
161 ; CHECK-LABEL: test_vsraq_n_s8:
163 ; CHECK-NEXT: ssra v0.16b, v1.16b, #3
165 %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
166 %1 = add <16 x i8> %vsra_n, %a
170 define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
171 ; CHECK-LABEL: test_vsraq_n_s16:
173 ; CHECK-NEXT: ssra v0.8h, v1.8h, #3
175 %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
176 %1 = add <8 x i16> %vsra_n, %a
180 define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
181 ; CHECK-LABEL: test_vsraq_n_s32:
183 ; CHECK-NEXT: ssra v0.4s, v1.4s, #3
185 %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
186 %1 = add <4 x i32> %vsra_n, %a
190 define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
191 ; CHECK-LABEL: test_vsraq_n_s64:
193 ; CHECK-NEXT: ssra v0.2d, v1.2d, #3
195 %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
196 %1 = add <2 x i64> %vsra_n, %a
200 define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
201 ; CHECK-LABEL: test_vsra_n_u8:
203 ; CHECK-NEXT: usra v0.8b, v1.8b, #3
205 %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
206 %1 = add <8 x i8> %vsra_n, %a
210 define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
211 ; CHECK-LABEL: test_vsra_n_u16:
213 ; CHECK-NEXT: usra v0.4h, v1.4h, #3
215 %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
216 %1 = add <4 x i16> %vsra_n, %a
220 define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
221 ; CHECK-LABEL: test_vsra_n_u32:
223 ; CHECK-NEXT: usra v0.2s, v1.2s, #3
225 %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
226 %1 = add <2 x i32> %vsra_n, %a
230 define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
231 ; CHECK-LABEL: test_vsraq_n_u8:
233 ; CHECK-NEXT: usra v0.16b, v1.16b, #3
235 %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
236 %1 = add <16 x i8> %vsra_n, %a
240 define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
241 ; CHECK-LABEL: test_vsraq_n_u16:
243 ; CHECK-NEXT: usra v0.8h, v1.8h, #3
245 %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
246 %1 = add <8 x i16> %vsra_n, %a
250 define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
251 ; CHECK-LABEL: test_vsraq_n_u32:
253 ; CHECK-NEXT: usra v0.4s, v1.4s, #3
255 %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
256 %1 = add <4 x i32> %vsra_n, %a
260 define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
261 ; CHECK-LABEL: test_vsraq_n_u64:
263 ; CHECK-NEXT: usra v0.2d, v1.2d, #3
265 %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
266 %1 = add <2 x i64> %vsra_n, %a
270 define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
271 ; CHECK-LABEL: test_vshrn_n_s16:
273 ; CHECK-NEXT: shrn v0.8b, v0.8h, #3
275 %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
276 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
277 ret <8 x i8> %vshrn_n
280 define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
281 ; CHECK-LABEL: test_vshrn_n_s32:
283 ; CHECK-NEXT: shrn v0.4h, v0.4s, #9
285 %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
286 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
287 ret <4 x i16> %vshrn_n
290 define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
291 ; CHECK-LABEL: test_vshrn_n_s64:
293 ; CHECK-NEXT: shrn v0.2s, v0.2d, #19
295 %1 = ashr <2 x i64> %a, <i64 19, i64 19>
296 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
297 ret <2 x i32> %vshrn_n
300 define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
301 ; CHECK-LABEL: test_vshrn_n_u16:
303 ; CHECK-NEXT: shrn v0.8b, v0.8h, #3
305 %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
306 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
307 ret <8 x i8> %vshrn_n
310 define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
311 ; CHECK-LABEL: test_vshrn_n_u32:
313 ; CHECK-NEXT: shrn v0.4h, v0.4s, #9
315 %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
316 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
317 ret <4 x i16> %vshrn_n
320 define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
321 ; CHECK-LABEL: test_vshrn_n_u64:
323 ; CHECK-NEXT: shrn v0.2s, v0.2d, #19
325 %1 = lshr <2 x i64> %a, <i64 19, i64 19>
326 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
327 ret <2 x i32> %vshrn_n
330 define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
331 ; CHECK-LABEL: test_vshrn_high_n_s16:
333 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
334 ; CHECK-NEXT: shrn2 v0.16b, v1.8h, #3
336 %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
337 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
338 %2 = bitcast <8 x i8> %a to <1 x i64>
339 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
340 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
341 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
345 define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
346 ; CHECK-LABEL: test_vshrn_high_n_s32:
348 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
349 ; CHECK-NEXT: shrn2 v0.8h, v1.4s, #9
351 %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
352 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
353 %2 = bitcast <4 x i16> %a to <1 x i64>
354 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
355 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
356 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
360 define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
361 ; CHECK-LABEL: test_vshrn_high_n_s64:
363 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
364 ; CHECK-NEXT: shrn2 v0.4s, v1.2d, #19
366 %1 = bitcast <2 x i32> %a to <1 x i64>
367 %2 = ashr <2 x i64> %b, <i64 19, i64 19>
368 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
369 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
370 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
371 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
375 define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
376 ; CHECK-LABEL: test_vshrn_high_n_u16:
378 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
379 ; CHECK-NEXT: shrn2 v0.16b, v1.8h, #3
381 %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
382 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
383 %2 = bitcast <8 x i8> %a to <1 x i64>
384 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
385 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
386 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
390 define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
391 ; CHECK-LABEL: test_vshrn_high_n_u32:
393 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
394 ; CHECK-NEXT: shrn2 v0.8h, v1.4s, #9
396 %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
397 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
398 %2 = bitcast <4 x i16> %a to <1 x i64>
399 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
400 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
401 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
405 define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
406 ; CHECK-LABEL: test_vshrn_high_n_u64:
408 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
409 ; CHECK-NEXT: shrn2 v0.4s, v1.2d, #19
411 %1 = bitcast <2 x i32> %a to <1 x i64>
412 %2 = lshr <2 x i64> %b, <i64 19, i64 19>
413 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
414 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
415 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
416 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
420 define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
421 ; CHECK-LABEL: test_vqshrun_high_n_s16:
423 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
424 ; CHECK-NEXT: sqshrun2 v0.16b, v1.8h, #3
426 %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
427 %1 = bitcast <8 x i8> %a to <1 x i64>
428 %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
429 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
430 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
434 define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
435 ; CHECK-LABEL: test_vqshrun_high_n_s32:
437 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
438 ; CHECK-NEXT: sqshrun2 v0.8h, v1.4s, #9
440 %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
441 %1 = bitcast <4 x i16> %a to <1 x i64>
442 %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
443 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
444 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
448 define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
449 ; CHECK-LABEL: test_vqshrun_high_n_s64:
451 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
452 ; CHECK-NEXT: sqshrun2 v0.4s, v1.2d, #19
454 %1 = bitcast <2 x i32> %a to <1 x i64>
455 %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19)
456 %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
457 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
458 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
462 define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
463 ; CHECK-LABEL: test_vrshrn_high_n_s16:
465 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
466 ; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #3
468 %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
469 %1 = bitcast <8 x i8> %a to <1 x i64>
470 %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
471 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
472 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
476 define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
477 ; CHECK-LABEL: test_vrshrn_high_n_s32:
479 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
480 ; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #9
482 %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
483 %1 = bitcast <4 x i16> %a to <1 x i64>
484 %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
485 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
486 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
490 define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
491 ; CHECK-LABEL: test_vrshrn_high_n_s64:
493 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
494 ; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #19
496 %1 = bitcast <2 x i32> %a to <1 x i64>
497 %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %b, i32 19)
498 %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
499 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
500 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
504 define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
505 ; CHECK-LABEL: test_vqrshrun_high_n_s16:
507 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
508 ; CHECK-NEXT: sqrshrun2 v0.16b, v1.8h, #3
510 %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
511 %1 = bitcast <8 x i8> %a to <1 x i64>
512 %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
513 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
514 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
518 define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
519 ; CHECK-LABEL: test_vqrshrun_high_n_s32:
521 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
522 ; CHECK-NEXT: sqrshrun2 v0.8h, v1.4s, #9
524 %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
525 %1 = bitcast <4 x i16> %a to <1 x i64>
526 %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
527 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
528 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
532 define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
533 ; CHECK-LABEL: test_vqrshrun_high_n_s64:
535 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
536 ; CHECK-NEXT: sqrshrun2 v0.4s, v1.2d, #19
538 %1 = bitcast <2 x i32> %a to <1 x i64>
539 %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19)
540 %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
541 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
542 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
546 define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
547 ; CHECK-LABEL: test_vqshrn_high_n_s16:
549 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
550 ; CHECK-NEXT: sqshrn2 v0.16b, v1.8h, #3
552 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
553 %1 = bitcast <8 x i8> %a to <1 x i64>
554 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
555 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
556 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
560 define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
561 ; CHECK-LABEL: test_vqshrn_high_n_s32:
563 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
564 ; CHECK-NEXT: sqshrn2 v0.8h, v1.4s, #9
566 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
567 %1 = bitcast <4 x i16> %a to <1 x i64>
568 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
569 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
570 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
574 define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
575 ; CHECK-LABEL: test_vqshrn_high_n_s64:
577 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
578 ; CHECK-NEXT: sqshrn2 v0.4s, v1.2d, #19
580 %1 = bitcast <2 x i32> %a to <1 x i64>
581 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19)
582 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
583 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
584 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
588 define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
589 ; CHECK-LABEL: test_vqshrn_high_n_u16:
591 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
592 ; CHECK-NEXT: uqshrn2 v0.16b, v1.8h, #3
594 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
595 %1 = bitcast <8 x i8> %a to <1 x i64>
596 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
597 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
598 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
602 define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
603 ; CHECK-LABEL: test_vqshrn_high_n_u32:
605 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
606 ; CHECK-NEXT: uqshrn2 v0.8h, v1.4s, #9
608 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
609 %1 = bitcast <4 x i16> %a to <1 x i64>
610 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
611 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
612 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
616 define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
617 ; CHECK-LABEL: test_vqshrn_high_n_u64:
619 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
620 ; CHECK-NEXT: uqshrn2 v0.4s, v1.2d, #19
622 %1 = bitcast <2 x i32> %a to <1 x i64>
623 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19)
624 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
625 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
626 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
630 define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
631 ; CHECK-LABEL: test_vqrshrn_high_n_s16:
633 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
634 ; CHECK-NEXT: sqrshrn2 v0.16b, v1.8h, #3
636 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
637 %1 = bitcast <8 x i8> %a to <1 x i64>
638 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
639 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
640 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
644 define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
645 ; CHECK-LABEL: test_vqrshrn_high_n_s32:
647 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
648 ; CHECK-NEXT: sqrshrn2 v0.8h, v1.4s, #9
650 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
651 %1 = bitcast <4 x i16> %a to <1 x i64>
652 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
653 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
654 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
658 define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
659 ; CHECK-LABEL: test_vqrshrn_high_n_s64:
661 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
662 ; CHECK-NEXT: sqrshrn2 v0.4s, v1.2d, #19
664 %1 = bitcast <2 x i32> %a to <1 x i64>
665 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19)
666 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
667 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
668 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
672 define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
673 ; CHECK-LABEL: test_vqrshrn_high_n_u16:
675 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
676 ; CHECK-NEXT: uqrshrn2 v0.16b, v1.8h, #3
678 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
679 %1 = bitcast <8 x i8> %a to <1 x i64>
680 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
681 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
682 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
686 define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
687 ; CHECK-LABEL: test_vqrshrn_high_n_u32:
689 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
690 ; CHECK-NEXT: uqrshrn2 v0.8h, v1.4s, #9
692 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
693 %1 = bitcast <4 x i16> %a to <1 x i64>
694 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
695 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
696 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
700 define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
701 ; CHECK-LABEL: test_vqrshrn_high_n_u64:
703 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
704 ; CHECK-NEXT: uqrshrn2 v0.4s, v1.2d, #19
706 %1 = bitcast <2 x i32> %a to <1 x i64>
707 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19)
708 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
709 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
710 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
716 declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32)
718 declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32)
720 declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32)
722 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
724 declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32)
726 declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32)
728 declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32)
730 declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32)
732 declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32)
734 declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32)
736 declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32)
738 declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32)
740 declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32)
742 declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32)
744 declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32)
746 declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32)
748 declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32)
750 declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32)
752 declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32)
754 declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32)
756 declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32)
758 declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
760 declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
762 declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
764 declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
766 declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
768 declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
770 declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
772 declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
774 declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
776 declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
778 declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
780 declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
782 define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
783 ; CHECK-LABEL: test_vcvt_n_s64_f64:
785 ; CHECK-NEXT: fcvtzs d0, d0, #64
787 %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
791 define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
792 ; CHECK-LABEL: test_vcvt_n_u64_f64:
794 ; CHECK-NEXT: fcvtzu d0, d0, #64
796 %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
800 define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
801 ; CHECK-LABEL: test_vcvt_n_f64_s64:
803 ; CHECK-NEXT: scvtf d0, d0, #64
805 %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
809 define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
810 ; CHECK-LABEL: test_vcvt_n_f64_u64:
812 ; CHECK-NEXT: ucvtf d0, d0, #64
814 %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
818 declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
819 declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
820 declare <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
821 declare <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)