1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // REQUIRES: aarch64-registered-target
4 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
5 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
6 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
7 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
8 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
10 #include <arm_sme_draft_spec_subject_to_change.h>
12 #ifdef SVE_OVERLOADED_FORMS
13 // A simple used,unused... macro, long enough to represent any SVE builtin.
14 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
16 #define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
21 // CHECK-LABEL: @test_svsqrshr_u16_u32_x4(
23 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0)
24 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4)
25 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshr.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], i32 16)
26 // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
28 // CPP-CHECK-LABEL: @_Z24test_svsqrshr_u16_u32_x412svuint32x2_t(
29 // CPP-CHECK-NEXT: entry:
30 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0)
31 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4)
32 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshr.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], i32 16)
33 // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
35 svuint16_t
test_svsqrshr_u16_u32_x4(svuint32x2_t zn
) __arm_streaming
{
36 return SVE_ACLE_FUNC(svqrshr
,_n
,_u16
,_u32_x2
,)(zn
, 16);
39 // CHECK-LABEL: @test_svsqrshr_s16_s32_x4(
41 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0)
42 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4)
43 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshr.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], i32 16)
44 // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
46 // CPP-CHECK-LABEL: @_Z24test_svsqrshr_s16_s32_x411svint32x2_t(
47 // CPP-CHECK-NEXT: entry:
48 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0)
49 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4)
50 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshr.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], i32 16)
51 // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
53 svint16_t
test_svsqrshr_s16_s32_x4(svint32x2_t zn
) __arm_streaming
{
54 return SVE_ACLE_FUNC(svqrshr
,_n
,_s16
,_s32_x2
,)(zn
, 16);
57 // CHECK-LABEL: @test_svsqrshr_u8_u32_x4(
59 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
60 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
61 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
62 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
63 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshr.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 8)
64 // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
66 // CPP-CHECK-LABEL: @_Z23test_svsqrshr_u8_u32_x412svuint32x4_t(
67 // CPP-CHECK-NEXT: entry:
68 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
69 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
70 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
71 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
72 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshr.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 8)
73 // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
75 svuint8_t
test_svsqrshr_u8_u32_x4(svuint32x4_t zn
) __arm_streaming
{
76 return SVE_ACLE_FUNC(svqrshr
,_n
,_u8
,_u32_x4
,)(zn
, 8);
79 // CHECK-LABEL: @test_svsqrshr_s8_s32_x4(
81 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
82 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
83 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
84 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
85 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshr.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 8)
86 // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
88 // CPP-CHECK-LABEL: @_Z23test_svsqrshr_s8_s32_x411svint32x4_t(
89 // CPP-CHECK-NEXT: entry:
90 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
91 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
92 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
93 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
94 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshr.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 8)
95 // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
97 svint8_t
test_svsqrshr_s8_s32_x4(svint32x4_t zn
) __arm_streaming
{
98 return SVE_ACLE_FUNC(svqrshr
,_n
,_s8
,_s32_x4
,)(zn
, 8);
101 // CHECK-LABEL: @test_svsqrshr_u16_u64_x4(
102 // CHECK-NEXT: entry:
103 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
104 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
105 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
106 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
107 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshr.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 16)
108 // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
110 // CPP-CHECK-LABEL: @_Z24test_svsqrshr_u16_u64_x412svuint64x4_t(
111 // CPP-CHECK-NEXT: entry:
112 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
113 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
114 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
115 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
116 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshr.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 16)
117 // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
119 svuint16_t
test_svsqrshr_u16_u64_x4(svuint64x4_t zn
) __arm_streaming
{
120 return SVE_ACLE_FUNC(svqrshr
,_n
,_u16
,_u64_x4
,)(zn
, 16);
123 // CHECK-LABEL: @test_svsqrshr_s16_s64_x4(
124 // CHECK-NEXT: entry:
125 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
126 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
127 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
128 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
129 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshr.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 16)
130 // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
132 // CPP-CHECK-LABEL: @_Z24test_svsqrshr_s16_s64_x411svint64x4_t(
133 // CPP-CHECK-NEXT: entry:
134 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
135 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
136 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
137 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
138 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshr.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 16)
139 // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
141 svint16_t
test_svsqrshr_s16_s64_x4(svint64x4_t zn
) __arm_streaming
{
142 return SVE_ACLE_FUNC(svqrshr
,_n
,_s16
,_s64_x4
,)(zn
, 16);
147 // CHECK-LABEL: @test_svsqrshrn_u8_u32_x4(
148 // CHECK-NEXT: entry:
149 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
150 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
151 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
152 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
153 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 8)
154 // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
156 // CPP-CHECK-LABEL: @_Z24test_svsqrshrn_u8_u32_x412svuint32x4_t(
157 // CPP-CHECK-NEXT: entry:
158 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
159 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
160 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
161 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
162 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 8)
163 // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
165 svuint8_t
test_svsqrshrn_u8_u32_x4(svuint32x4_t zn
) __arm_streaming
{
166 return SVE_ACLE_FUNC(svqrshrn
,_n
,_u8
,_u32_x4
,)(zn
, 8);
169 // CHECK-LABEL: @test_svsqrshrn_s8_s32_x4(
170 // CHECK-NEXT: entry:
171 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
172 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
173 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
174 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
175 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 8)
176 // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
178 // CPP-CHECK-LABEL: @_Z24test_svsqrshrn_s8_s32_x411svint32x4_t(
179 // CPP-CHECK-NEXT: entry:
180 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
181 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
182 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
183 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
184 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 8)
185 // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
187 svint8_t
test_svsqrshrn_s8_s32_x4(svint32x4_t zn
) __arm_streaming
{
188 return SVE_ACLE_FUNC(svqrshrn
,_n
,_s8
,_s32_x4
,)(zn
, 8);
191 // CHECK-LABEL: @test_svsqrshrn_u16_u64_x4(
192 // CHECK-NEXT: entry:
193 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
194 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
195 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
196 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
197 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 16)
198 // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
200 // CPP-CHECK-LABEL: @_Z25test_svsqrshrn_u16_u64_x412svuint64x4_t(
201 // CPP-CHECK-NEXT: entry:
202 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
203 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
204 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
205 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
206 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 16)
207 // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
209 svuint16_t
test_svsqrshrn_u16_u64_x4(svuint64x4_t zn
) __arm_streaming
{
210 return SVE_ACLE_FUNC(svqrshrn
,_n
,_u16
,_u64_x4
,)(zn
, 16);
213 // CHECK-LABEL: @test_svsqrshrn_s16_s64_x4(
214 // CHECK-NEXT: entry:
215 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
216 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
217 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
218 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
219 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 16)
220 // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
222 // CPP-CHECK-LABEL: @_Z25test_svsqrshrn_s16_s64_x411svint64x4_t(
223 // CPP-CHECK-NEXT: entry:
224 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
225 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
226 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
227 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
228 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 16)
229 // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
231 svint16_t
test_svsqrshrn_s16_s64_x4(svint64x4_t zn
) __arm_streaming
{
232 return SVE_ACLE_FUNC(svqrshrn
,_n
,_s16
,_s64_x4
,)(zn
, 16);
237 // CHECK-LABEL: @test_svsvqrshru_u16_s32_x2(
238 // CHECK-NEXT: entry:
239 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0)
240 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4)
241 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshru.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], i32 16)
242 // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
244 // CPP-CHECK-LABEL: @_Z26test_svsvqrshru_u16_s32_x211svint32x2_t(
245 // CPP-CHECK-NEXT: entry:
246 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0)
247 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4)
248 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshru.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], i32 16)
249 // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
251 svuint16_t
test_svsvqrshru_u16_s32_x2(svint32x2_t zn
) __arm_streaming
{
252 return SVE_ACLE_FUNC(svqrshru
,_n
,_u16
,_s32_x2
,)(zn
, 16);
255 // CHECK-LABEL: @test_svsqrshru_u8_s32_x4(
256 // CHECK-NEXT: entry:
257 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
258 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
259 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
260 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
261 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshru.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 8)
262 // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
264 // CPP-CHECK-LABEL: @_Z24test_svsqrshru_u8_s32_x411svint32x4_t(
265 // CPP-CHECK-NEXT: entry:
266 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
267 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
268 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
269 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
270 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshru.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 8)
271 // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
273 svuint8_t
test_svsqrshru_u8_s32_x4(svint32x4_t zn
) __arm_streaming
{
274 return SVE_ACLE_FUNC(svqrshru
,_n
,_u8
,_s32_x4
,)(zn
, 8);
277 // CHECK-LABEL: @test_svsqrshru_u16_s64_x4(
278 // CHECK-NEXT: entry:
279 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
280 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
281 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
282 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
283 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshru.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 16)
284 // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
286 // CPP-CHECK-LABEL: @_Z25test_svsqrshru_u16_s64_x411svint64x4_t(
287 // CPP-CHECK-NEXT: entry:
288 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
289 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
290 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
291 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
292 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshru.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 16)
293 // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
295 svuint16_t
test_svsqrshru_u16_s64_x4(svint64x4_t zn
) __arm_streaming
{
296 return SVE_ACLE_FUNC(svqrshru
,_n
,_u16
,_s64_x4
,)(zn
, 16);
301 // CHECK-LABEL: @test_svsqrshrun_u8_s32_x4(
302 // CHECK-NEXT: entry:
303 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
304 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
305 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
306 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
307 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 32)
308 // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
310 // CPP-CHECK-LABEL: @_Z25test_svsqrshrun_u8_s32_x411svint32x4_t(
311 // CPP-CHECK-NEXT: entry:
312 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN:%.*]], i64 0)
313 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4)
314 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8)
315 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12)
316 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i32 32)
317 // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP4]]
319 svuint8_t
test_svsqrshrun_u8_s32_x4(svint32x4_t zn
) __arm_streaming
{
320 return SVE_ACLE_FUNC(svqrshrun
,_n
,_u8
,_s32_x4
,)(zn
, 32);
323 // CHECK-LABEL: @test_svsqrshrun_u16_s64_x4(
324 // CHECK-NEXT: entry:
325 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
326 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
327 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
328 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
329 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrun.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 64)
330 // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
332 // CPP-CHECK-LABEL: @_Z26test_svsqrshrun_u16_s64_x411svint64x4_t(
333 // CPP-CHECK-NEXT: entry:
334 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN:%.*]], i64 0)
335 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 2)
336 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 4)
337 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[ZN]], i64 6)
338 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrun.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i32 64)
339 // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
341 svuint16_t
test_svsqrshrun_u16_s64_x4(svint64x4_t zn
) __arm_streaming
{
342 return SVE_ACLE_FUNC(svqrshrun
,_n
,_u16
,_s64_x4
,)(zn
, 64);