Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGenCXX / aarch64-sve-vector-init.cpp
blob3a57f80110d4738533ce1edf4861e8b54d1a6035
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
2 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -emit-llvm -o - %s | FileCheck %s
4 // CHECK-LABEL: define dso_local void @_Z11test_localsv
5 // CHECK-SAME: () #[[ATTR0:[0-9]+]] {
6 // CHECK-NEXT: entry:
7 // CHECK-NEXT: [[S8:%.*]] = alloca <vscale x 16 x i8>, align 16
8 // CHECK-NEXT: [[S16:%.*]] = alloca <vscale x 8 x i16>, align 16
9 // CHECK-NEXT: [[S32:%.*]] = alloca <vscale x 4 x i32>, align 16
10 // CHECK-NEXT: [[S64:%.*]] = alloca <vscale x 2 x i64>, align 16
11 // CHECK-NEXT: [[U8:%.*]] = alloca <vscale x 16 x i8>, align 16
12 // CHECK-NEXT: [[U16:%.*]] = alloca <vscale x 8 x i16>, align 16
13 // CHECK-NEXT: [[U32:%.*]] = alloca <vscale x 4 x i32>, align 16
14 // CHECK-NEXT: [[U64:%.*]] = alloca <vscale x 2 x i64>, align 16
15 // CHECK-NEXT: [[F16:%.*]] = alloca <vscale x 8 x half>, align 16
16 // CHECK-NEXT: [[F32:%.*]] = alloca <vscale x 4 x float>, align 16
17 // CHECK-NEXT: [[F64:%.*]] = alloca <vscale x 2 x double>, align 16
18 // CHECK-NEXT: [[BF16:%.*]] = alloca <vscale x 8 x bfloat>, align 16
19 // CHECK-NEXT: [[S8X2:%.*]] = alloca <vscale x 32 x i8>, align 16
20 // CHECK-NEXT: [[S16X2:%.*]] = alloca <vscale x 16 x i16>, align 16
21 // CHECK-NEXT: [[S32X2:%.*]] = alloca <vscale x 8 x i32>, align 16
22 // CHECK-NEXT: [[X64X2:%.*]] = alloca <vscale x 4 x i64>, align 16
23 // CHECK-NEXT: [[U8X2:%.*]] = alloca <vscale x 32 x i8>, align 16
24 // CHECK-NEXT: [[U16X2:%.*]] = alloca <vscale x 16 x i16>, align 16
25 // CHECK-NEXT: [[U32X2:%.*]] = alloca <vscale x 8 x i32>, align 16
26 // CHECK-NEXT: [[U64X2:%.*]] = alloca <vscale x 4 x i64>, align 16
27 // CHECK-NEXT: [[F16X2:%.*]] = alloca <vscale x 16 x half>, align 16
28 // CHECK-NEXT: [[F32X2:%.*]] = alloca <vscale x 8 x float>, align 16
29 // CHECK-NEXT: [[F64X2:%.*]] = alloca <vscale x 4 x double>, align 16
30 // CHECK-NEXT: [[BF16X2:%.*]] = alloca <vscale x 16 x bfloat>, align 16
31 // CHECK-NEXT: [[S8X3:%.*]] = alloca <vscale x 48 x i8>, align 16
32 // CHECK-NEXT: [[S16X3:%.*]] = alloca <vscale x 24 x i16>, align 16
33 // CHECK-NEXT: [[S32X3:%.*]] = alloca <vscale x 12 x i32>, align 16
34 // CHECK-NEXT: [[X64X3:%.*]] = alloca <vscale x 6 x i64>, align 16
35 // CHECK-NEXT: [[U8X3:%.*]] = alloca <vscale x 48 x i8>, align 16
36 // CHECK-NEXT: [[U16X3:%.*]] = alloca <vscale x 24 x i16>, align 16
37 // CHECK-NEXT: [[U32X3:%.*]] = alloca <vscale x 12 x i32>, align 16
38 // CHECK-NEXT: [[U64X3:%.*]] = alloca <vscale x 6 x i64>, align 16
39 // CHECK-NEXT: [[F16X3:%.*]] = alloca <vscale x 24 x half>, align 16
40 // CHECK-NEXT: [[F32X3:%.*]] = alloca <vscale x 12 x float>, align 16
41 // CHECK-NEXT: [[F64X3:%.*]] = alloca <vscale x 6 x double>, align 16
42 // CHECK-NEXT: [[BF16X3:%.*]] = alloca <vscale x 24 x bfloat>, align 16
43 // CHECK-NEXT: [[S8X4:%.*]] = alloca <vscale x 64 x i8>, align 16
44 // CHECK-NEXT: [[S16X4:%.*]] = alloca <vscale x 32 x i16>, align 16
45 // CHECK-NEXT: [[S32X4:%.*]] = alloca <vscale x 16 x i32>, align 16
46 // CHECK-NEXT: [[X64X4:%.*]] = alloca <vscale x 8 x i64>, align 16
47 // CHECK-NEXT: [[U8X4:%.*]] = alloca <vscale x 64 x i8>, align 16
48 // CHECK-NEXT: [[U16X4:%.*]] = alloca <vscale x 32 x i16>, align 16
49 // CHECK-NEXT: [[U32X4:%.*]] = alloca <vscale x 16 x i32>, align 16
50 // CHECK-NEXT: [[U64X4:%.*]] = alloca <vscale x 8 x i64>, align 16
51 // CHECK-NEXT: [[F16X4:%.*]] = alloca <vscale x 32 x half>, align 16
52 // CHECK-NEXT: [[F32X4:%.*]] = alloca <vscale x 16 x float>, align 16
53 // CHECK-NEXT: [[F64X4:%.*]] = alloca <vscale x 8 x double>, align 16
54 // CHECK-NEXT: [[BF16X4:%.*]] = alloca <vscale x 32 x bfloat>, align 16
55 // CHECK-NEXT: [[B8:%.*]] = alloca <vscale x 16 x i1>, align 2
56 // CHECK-NEXT: [[B8X2:%.*]] = alloca <vscale x 32 x i1>, align 2
57 // CHECK-NEXT: [[B8X4:%.*]] = alloca <vscale x 64 x i1>, align 2
58 // CHECK-NEXT: [[CNT:%.*]] = alloca target("aarch64.svcount"), align 2
59 // CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[S8]], align 16
60 // CHECK-NEXT: store <vscale x 8 x i16> zeroinitializer, ptr [[S16]], align 16
61 // CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[S32]], align 16
62 // CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[S64]], align 16
63 // CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[U8]], align 16
64 // CHECK-NEXT: store <vscale x 8 x i16> zeroinitializer, ptr [[U16]], align 16
65 // CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[U32]], align 16
66 // CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[U64]], align 16
67 // CHECK-NEXT: store <vscale x 8 x half> zeroinitializer, ptr [[F16]], align 16
68 // CHECK-NEXT: store <vscale x 4 x float> zeroinitializer, ptr [[F32]], align 16
69 // CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[F64]], align 16
70 // CHECK-NEXT: store <vscale x 8 x bfloat> zeroinitializer, ptr [[BF16]], align 16
71 // CHECK-NEXT: store <vscale x 32 x i8> zeroinitializer, ptr [[S8X2]], align 16
72 // CHECK-NEXT: store <vscale x 16 x i16> zeroinitializer, ptr [[S16X2]], align 16
73 // CHECK-NEXT: store <vscale x 8 x i32> zeroinitializer, ptr [[S32X2]], align 16
74 // CHECK-NEXT: store <vscale x 4 x i64> zeroinitializer, ptr [[X64X2]], align 16
75 // CHECK-NEXT: store <vscale x 32 x i8> zeroinitializer, ptr [[U8X2]], align 16
76 // CHECK-NEXT: store <vscale x 16 x i16> zeroinitializer, ptr [[U16X2]], align 16
77 // CHECK-NEXT: store <vscale x 8 x i32> zeroinitializer, ptr [[U32X2]], align 16
78 // CHECK-NEXT: store <vscale x 4 x i64> zeroinitializer, ptr [[U64X2]], align 16
79 // CHECK-NEXT: store <vscale x 16 x half> zeroinitializer, ptr [[F16X2]], align 16
80 // CHECK-NEXT: store <vscale x 8 x float> zeroinitializer, ptr [[F32X2]], align 16
81 // CHECK-NEXT: store <vscale x 4 x double> zeroinitializer, ptr [[F64X2]], align 16
82 // CHECK-NEXT: store <vscale x 16 x bfloat> zeroinitializer, ptr [[BF16X2]], align 16
83 // CHECK-NEXT: store <vscale x 48 x i8> zeroinitializer, ptr [[S8X3]], align 16
84 // CHECK-NEXT: store <vscale x 24 x i16> zeroinitializer, ptr [[S16X3]], align 16
85 // CHECK-NEXT: store <vscale x 12 x i32> zeroinitializer, ptr [[S32X3]], align 16
86 // CHECK-NEXT: store <vscale x 6 x i64> zeroinitializer, ptr [[X64X3]], align 16
87 // CHECK-NEXT: store <vscale x 48 x i8> zeroinitializer, ptr [[U8X3]], align 16
88 // CHECK-NEXT: store <vscale x 24 x i16> zeroinitializer, ptr [[U16X3]], align 16
89 // CHECK-NEXT: store <vscale x 12 x i32> zeroinitializer, ptr [[U32X3]], align 16
90 // CHECK-NEXT: store <vscale x 6 x i64> zeroinitializer, ptr [[U64X3]], align 16
91 // CHECK-NEXT: store <vscale x 24 x half> zeroinitializer, ptr [[F16X3]], align 16
92 // CHECK-NEXT: store <vscale x 12 x float> zeroinitializer, ptr [[F32X3]], align 16
93 // CHECK-NEXT: store <vscale x 6 x double> zeroinitializer, ptr [[F64X3]], align 16
94 // CHECK-NEXT: store <vscale x 24 x bfloat> zeroinitializer, ptr [[BF16X3]], align 16
95 // CHECK-NEXT: store <vscale x 64 x i8> zeroinitializer, ptr [[S8X4]], align 16
96 // CHECK-NEXT: store <vscale x 32 x i16> zeroinitializer, ptr [[S16X4]], align 16
97 // CHECK-NEXT: store <vscale x 16 x i32> zeroinitializer, ptr [[S32X4]], align 16
98 // CHECK-NEXT: store <vscale x 8 x i64> zeroinitializer, ptr [[X64X4]], align 16
99 // CHECK-NEXT: store <vscale x 64 x i8> zeroinitializer, ptr [[U8X4]], align 16
100 // CHECK-NEXT: store <vscale x 32 x i16> zeroinitializer, ptr [[U16X4]], align 16
101 // CHECK-NEXT: store <vscale x 16 x i32> zeroinitializer, ptr [[U32X4]], align 16
102 // CHECK-NEXT: store <vscale x 8 x i64> zeroinitializer, ptr [[U64X4]], align 16
103 // CHECK-NEXT: store <vscale x 32 x half> zeroinitializer, ptr [[F16X4]], align 16
104 // CHECK-NEXT: store <vscale x 16 x float> zeroinitializer, ptr [[F32X4]], align 16
105 // CHECK-NEXT: store <vscale x 8 x double> zeroinitializer, ptr [[F64X4]], align 16
106 // CHECK-NEXT: store <vscale x 32 x bfloat> zeroinitializer, ptr [[BF16X4]], align 16
107 // CHECK-NEXT: store <vscale x 16 x i1> zeroinitializer, ptr [[B8]], align 2
108 // CHECK-NEXT: store <vscale x 32 x i1> zeroinitializer, ptr [[B8X2]], align 2
109 // CHECK-NEXT: store <vscale x 64 x i1> zeroinitializer, ptr [[B8X4]], align 2
110 // CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr [[CNT]], align 2
111 // CHECK-NEXT: ret void
113 void test_locals(void) {
114 __SVInt8_t s8{};
115 __SVInt16_t s16{};
116 __SVInt32_t s32{};
117 __SVInt64_t s64{};
118 __SVUint8_t u8{};
119 __SVUint16_t u16{};
120 __SVUint32_t u32{};
121 __SVUint64_t u64{};
122 __SVFloat16_t f16{};
123 __SVFloat32_t f32{};
124 __SVFloat64_t f64{};
125 __SVBfloat16_t bf16{};
127 __clang_svint8x2_t s8x2{};
128 __clang_svint16x2_t s16x2{};
129 __clang_svint32x2_t s32x2{};
130 __clang_svint64x2_t x64x2{};
131 __clang_svuint8x2_t u8x2{};
132 __clang_svuint16x2_t u16x2{};
133 __clang_svuint32x2_t u32x2{};
134 __clang_svuint64x2_t u64x2{};
135 __clang_svfloat16x2_t f16x2{};
136 __clang_svfloat32x2_t f32x2{};
137 __clang_svfloat64x2_t f64x2{};
138 __clang_svbfloat16x2_t bf16x2{};
140 __clang_svint8x3_t s8x3{};
141 __clang_svint16x3_t s16x3{};
142 __clang_svint32x3_t s32x3{};
143 __clang_svint64x3_t x64x3{};
144 __clang_svuint8x3_t u8x3{};
145 __clang_svuint16x3_t u16x3{};
146 __clang_svuint32x3_t u32x3{};
147 __clang_svuint64x3_t u64x3{};
148 __clang_svfloat16x3_t f16x3{};
149 __clang_svfloat32x3_t f32x3{};
150 __clang_svfloat64x3_t f64x3{};
151 __clang_svbfloat16x3_t bf16x3{};
153 __clang_svint8x4_t s8x4{};
154 __clang_svint16x4_t s16x4{};
155 __clang_svint32x4_t s32x4{};
156 __clang_svint64x4_t x64x4{};
157 __clang_svuint8x4_t u8x4{};
158 __clang_svuint16x4_t u16x4{};
159 __clang_svuint32x4_t u32x4{};
160 __clang_svuint64x4_t u64x4{};
161 __clang_svfloat16x4_t f16x4{};
162 __clang_svfloat32x4_t f32x4{};
163 __clang_svfloat64x4_t f64x4{};
164 __clang_svbfloat16x4_t bf16x4{};
166 __SVBool_t b8{};
167 __clang_svboolx2_t b8x2{};
168 __clang_svboolx4_t b8x4{};
170 __SVCount_t cnt{};
173 // CHECK-LABEL: define dso_local void @_Z12test_copy_s8u10__SVInt8_t
174 // CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
175 // CHECK-NEXT: entry:
176 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 16 x i8>, align 16
177 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 16 x i8>, align 16
178 // CHECK-NEXT: store <vscale x 16 x i8> [[A]], ptr [[A_ADDR]], align 16
179 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[A_ADDR]], align 16
180 // CHECK-NEXT: store <vscale x 16 x i8> [[TMP0]], ptr [[B]], align 16
181 // CHECK-NEXT: ret void
183 void test_copy_s8(__SVInt8_t a) {
184 __SVInt8_t b{a};
187 // CHECK-LABEL: define dso_local void @_Z13test_copy_s16u11__SVInt16_t
188 // CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]]) #[[ATTR0]] {
189 // CHECK-NEXT: entry:
190 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 8 x i16>, align 16
191 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 8 x i16>, align 16
192 // CHECK-NEXT: store <vscale x 8 x i16> [[A]], ptr [[A_ADDR]], align 16
193 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i16>, ptr [[A_ADDR]], align 16
194 // CHECK-NEXT: store <vscale x 8 x i16> [[TMP0]], ptr [[B]], align 16
195 // CHECK-NEXT: ret void
197 void test_copy_s16(__SVInt16_t a) {
198 __SVInt16_t b{a};
201 // CHECK-LABEL: define dso_local void @_Z13test_copy_s32u11__SVInt32_t
202 // CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
203 // CHECK-NEXT: entry:
204 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
205 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 4 x i32>, align 16
206 // CHECK-NEXT: store <vscale x 4 x i32> [[A]], ptr [[A_ADDR]], align 16
207 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x i32>, ptr [[A_ADDR]], align 16
208 // CHECK-NEXT: store <vscale x 4 x i32> [[TMP0]], ptr [[B]], align 16
209 // CHECK-NEXT: ret void
211 void test_copy_s32(__SVInt32_t a) {
212 __SVInt32_t b{a};
215 // CHECK-LABEL: define dso_local void @_Z13test_copy_s64u11__SVInt64_t
216 // CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
217 // CHECK-NEXT: entry:
218 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 2 x i64>, align 16
219 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 2 x i64>, align 16
220 // CHECK-NEXT: store <vscale x 2 x i64> [[A]], ptr [[A_ADDR]], align 16
221 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 2 x i64>, ptr [[A_ADDR]], align 16
222 // CHECK-NEXT: store <vscale x 2 x i64> [[TMP0]], ptr [[B]], align 16
223 // CHECK-NEXT: ret void
225 void test_copy_s64(__SVInt64_t a) {
226 __SVInt64_t b{a};
229 // CHECK-LABEL: define dso_local void @_Z12test_copy_u8u11__SVUint8_t
230 // CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
231 // CHECK-NEXT: entry:
232 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 16 x i8>, align 16
233 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 16 x i8>, align 16
234 // CHECK-NEXT: store <vscale x 16 x i8> [[A]], ptr [[A_ADDR]], align 16
235 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[A_ADDR]], align 16
236 // CHECK-NEXT: store <vscale x 16 x i8> [[TMP0]], ptr [[B]], align 16
237 // CHECK-NEXT: ret void
239 void test_copy_u8(__SVUint8_t a) {
240 __SVUint8_t b{a};
243 // CHECK-LABEL: define dso_local void @_Z13test_copy_u16u12__SVUint16_t
244 // CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]]) #[[ATTR0]] {
245 // CHECK-NEXT: entry:
246 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 8 x i16>, align 16
247 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 8 x i16>, align 16
248 // CHECK-NEXT: store <vscale x 8 x i16> [[A]], ptr [[A_ADDR]], align 16
249 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i16>, ptr [[A_ADDR]], align 16
250 // CHECK-NEXT: store <vscale x 8 x i16> [[TMP0]], ptr [[B]], align 16
251 // CHECK-NEXT: ret void
253 void test_copy_u16(__SVUint16_t a) {
254 __SVUint16_t b{a};
257 // CHECK-LABEL: define dso_local void @_Z13test_copy_u32u12__SVUint32_t
258 // CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
259 // CHECK-NEXT: entry:
260 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
261 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 4 x i32>, align 16
262 // CHECK-NEXT: store <vscale x 4 x i32> [[A]], ptr [[A_ADDR]], align 16
263 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x i32>, ptr [[A_ADDR]], align 16
264 // CHECK-NEXT: store <vscale x 4 x i32> [[TMP0]], ptr [[B]], align 16
265 // CHECK-NEXT: ret void
267 void test_copy_u32(__SVUint32_t a) {
268 __SVUint32_t b{a};
271 // CHECK-LABEL: define dso_local void @_Z13test_copy_u64u12__SVUint64_t
272 // CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
273 // CHECK-NEXT: entry:
274 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 2 x i64>, align 16
275 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 2 x i64>, align 16
276 // CHECK-NEXT: store <vscale x 2 x i64> [[A]], ptr [[A_ADDR]], align 16
277 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 2 x i64>, ptr [[A_ADDR]], align 16
278 // CHECK-NEXT: store <vscale x 2 x i64> [[TMP0]], ptr [[B]], align 16
279 // CHECK-NEXT: ret void
281 void test_copy_u64(__SVUint64_t a) {
282 __SVUint64_t b{a};
285 // CHECK-LABEL: define dso_local void @_Z13test_copy_f16u13__SVFloat16_t
286 // CHECK-SAME: (<vscale x 8 x half> [[A:%.*]]) #[[ATTR0]] {
287 // CHECK-NEXT: entry:
288 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 8 x half>, align 16
289 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 8 x half>, align 16
290 // CHECK-NEXT: store <vscale x 8 x half> [[A]], ptr [[A_ADDR]], align 16
291 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x half>, ptr [[A_ADDR]], align 16
292 // CHECK-NEXT: store <vscale x 8 x half> [[TMP0]], ptr [[B]], align 16
293 // CHECK-NEXT: ret void
295 void test_copy_f16(__SVFloat16_t a) {
296 __SVFloat16_t b{a};
299 // CHECK-LABEL: define dso_local void @_Z13test_copy_f32u13__SVFloat32_t
300 // CHECK-SAME: (<vscale x 4 x float> [[A:%.*]]) #[[ATTR0]] {
301 // CHECK-NEXT: entry:
302 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 4 x float>, align 16
303 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 4 x float>, align 16
304 // CHECK-NEXT: store <vscale x 4 x float> [[A]], ptr [[A_ADDR]], align 16
305 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x float>, ptr [[A_ADDR]], align 16
306 // CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[B]], align 16
307 // CHECK-NEXT: ret void
309 void test_copy_f32(__SVFloat32_t a) {
310 __SVFloat32_t b{a};
313 // CHECK-LABEL: define dso_local void @_Z13test_copy_f64u13__SVFloat64_t
314 // CHECK-SAME: (<vscale x 2 x double> [[A:%.*]]) #[[ATTR0]] {
315 // CHECK-NEXT: entry:
316 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 2 x double>, align 16
317 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 2 x double>, align 16
318 // CHECK-NEXT: store <vscale x 2 x double> [[A]], ptr [[A_ADDR]], align 16
319 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 2 x double>, ptr [[A_ADDR]], align 16
320 // CHECK-NEXT: store <vscale x 2 x double> [[TMP0]], ptr [[B]], align 16
321 // CHECK-NEXT: ret void
323 void test_copy_f64(__SVFloat64_t a) {
324 __SVFloat64_t b{a};
327 // CHECK-LABEL: define dso_local void @_Z14test_copy_bf16u14__SVBfloat16_t
328 // CHECK-SAME: (<vscale x 8 x bfloat> [[A:%.*]]) #[[ATTR0]] {
329 // CHECK-NEXT: entry:
330 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 8 x bfloat>, align 16
331 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 8 x bfloat>, align 16
332 // CHECK-NEXT: store <vscale x 8 x bfloat> [[A]], ptr [[A_ADDR]], align 16
333 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x bfloat>, ptr [[A_ADDR]], align 16
334 // CHECK-NEXT: store <vscale x 8 x bfloat> [[TMP0]], ptr [[B]], align 16
335 // CHECK-NEXT: ret void
337 void test_copy_bf16(__SVBfloat16_t a) {
338 __SVBfloat16_t b{a};
341 // CHECK-LABEL: define dso_local void @_Z14test_copy_s8x210svint8x2_t
342 // CHECK-SAME: (<vscale x 32 x i8> [[A:%.*]]) #[[ATTR0]] {
343 // CHECK-NEXT: entry:
344 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 32 x i8>, align 16
345 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 32 x i8>, align 16
346 // CHECK-NEXT: store <vscale x 32 x i8> [[A]], ptr [[A_ADDR]], align 16
347 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 32 x i8>, ptr [[A_ADDR]], align 16
348 // CHECK-NEXT: store <vscale x 32 x i8> [[TMP0]], ptr [[B]], align 16
349 // CHECK-NEXT: ret void
351 void test_copy_s8x2(__clang_svint8x2_t a) {
352 __clang_svint8x2_t b{a};
355 // CHECK-LABEL: define dso_local void @_Z15test_copy_s16x211svint16x2_t
356 // CHECK-SAME: (<vscale x 16 x i16> [[A:%.*]]) #[[ATTR0]] {
357 // CHECK-NEXT: entry:
358 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 16 x i16>, align 16
359 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 16 x i16>, align 16
360 // CHECK-NEXT: store <vscale x 16 x i16> [[A]], ptr [[A_ADDR]], align 16
361 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i16>, ptr [[A_ADDR]], align 16
362 // CHECK-NEXT: store <vscale x 16 x i16> [[TMP0]], ptr [[B]], align 16
363 // CHECK-NEXT: ret void
365 void test_copy_s16x2(__clang_svint16x2_t a) {
366 __clang_svint16x2_t b{a};
369 // CHECK-LABEL: define dso_local void @_Z15test_copy_s32x211svint32x2_t
370 // CHECK-SAME: (<vscale x 8 x i32> [[A:%.*]]) #[[ATTR0]] {
371 // CHECK-NEXT: entry:
372 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 8 x i32>, align 16
373 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 8 x i32>, align 16
374 // CHECK-NEXT: store <vscale x 8 x i32> [[A]], ptr [[A_ADDR]], align 16
375 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i32>, ptr [[A_ADDR]], align 16
376 // CHECK-NEXT: store <vscale x 8 x i32> [[TMP0]], ptr [[B]], align 16
377 // CHECK-NEXT: ret void
379 void test_copy_s32x2(__clang_svint32x2_t a) {
380 __clang_svint32x2_t b{a};
383 // CHECK-LABEL: define dso_local void @_Z15test_copy_s64x211svint64x2_t
384 // CHECK-SAME: (<vscale x 4 x i64> [[A:%.*]]) #[[ATTR0]] {
385 // CHECK-NEXT: entry:
386 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 4 x i64>, align 16
387 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 4 x i64>, align 16
388 // CHECK-NEXT: store <vscale x 4 x i64> [[A]], ptr [[A_ADDR]], align 16
389 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x i64>, ptr [[A_ADDR]], align 16
390 // CHECK-NEXT: store <vscale x 4 x i64> [[TMP0]], ptr [[B]], align 16
391 // CHECK-NEXT: ret void
393 void test_copy_s64x2(__clang_svint64x2_t a) {
394 __clang_svint64x2_t b{a};
397 // CHECK-LABEL: define dso_local void @_Z14test_copy_u8x211svuint8x2_t
398 // CHECK-SAME: (<vscale x 32 x i8> [[A:%.*]]) #[[ATTR0]] {
399 // CHECK-NEXT: entry:
400 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 32 x i8>, align 16
401 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 32 x i8>, align 16
402 // CHECK-NEXT: store <vscale x 32 x i8> [[A]], ptr [[A_ADDR]], align 16
403 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 32 x i8>, ptr [[A_ADDR]], align 16
404 // CHECK-NEXT: store <vscale x 32 x i8> [[TMP0]], ptr [[B]], align 16
405 // CHECK-NEXT: ret void
407 void test_copy_u8x2(__clang_svuint8x2_t a) {
408 __clang_svuint8x2_t b{a};
411 // CHECK-LABEL: define dso_local void @_Z15test_copy_u16x212svuint16x2_t
412 // CHECK-SAME: (<vscale x 16 x i16> [[A:%.*]]) #[[ATTR0]] {
413 // CHECK-NEXT: entry:
414 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 16 x i16>, align 16
415 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 16 x i16>, align 16
416 // CHECK-NEXT: store <vscale x 16 x i16> [[A]], ptr [[A_ADDR]], align 16
417 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i16>, ptr [[A_ADDR]], align 16
418 // CHECK-NEXT: store <vscale x 16 x i16> [[TMP0]], ptr [[B]], align 16
419 // CHECK-NEXT: ret void
421 void test_copy_u16x2(__clang_svuint16x2_t a) {
422 __clang_svuint16x2_t b{a};
425 // CHECK-LABEL: define dso_local void @_Z15test_copy_u32x212svuint32x2_t
426 // CHECK-SAME: (<vscale x 8 x i32> [[A:%.*]]) #[[ATTR0]] {
427 // CHECK-NEXT: entry:
428 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 8 x i32>, align 16
429 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 8 x i32>, align 16
430 // CHECK-NEXT: store <vscale x 8 x i32> [[A]], ptr [[A_ADDR]], align 16
431 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i32>, ptr [[A_ADDR]], align 16
432 // CHECK-NEXT: store <vscale x 8 x i32> [[TMP0]], ptr [[B]], align 16
433 // CHECK-NEXT: ret void
435 void test_copy_u32x2(__clang_svuint32x2_t a) {
436 __clang_svuint32x2_t b{a};
439 // CHECK-LABEL: define dso_local void @_Z15test_copy_u64x212svuint64x2_t
440 // CHECK-SAME: (<vscale x 4 x i64> [[A:%.*]]) #[[ATTR0]] {
441 // CHECK-NEXT: entry:
442 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 4 x i64>, align 16
443 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 4 x i64>, align 16
444 // CHECK-NEXT: store <vscale x 4 x i64> [[A]], ptr [[A_ADDR]], align 16
445 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x i64>, ptr [[A_ADDR]], align 16
446 // CHECK-NEXT: store <vscale x 4 x i64> [[TMP0]], ptr [[B]], align 16
447 // CHECK-NEXT: ret void
449 void test_copy_u64x2(__clang_svuint64x2_t a) {
450 __clang_svuint64x2_t b{a};
453 // CHECK-LABEL: define dso_local void @_Z15test_copy_f16x213svfloat16x2_t
454 // CHECK-SAME: (<vscale x 16 x half> [[A:%.*]]) #[[ATTR0]] {
455 // CHECK-NEXT: entry:
456 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 16 x half>, align 16
457 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 16 x half>, align 16
458 // CHECK-NEXT: store <vscale x 16 x half> [[A]], ptr [[A_ADDR]], align 16
459 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x half>, ptr [[A_ADDR]], align 16
460 // CHECK-NEXT: store <vscale x 16 x half> [[TMP0]], ptr [[B]], align 16
461 // CHECK-NEXT: ret void
463 void test_copy_f16x2(__clang_svfloat16x2_t a) {
464 __clang_svfloat16x2_t b{a};
467 // CHECK-LABEL: define dso_local void @_Z15test_copy_f32x213svfloat32x2_t
468 // CHECK-SAME: (<vscale x 8 x float> [[A:%.*]]) #[[ATTR0]] {
469 // CHECK-NEXT: entry:
470 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 8 x float>, align 16
471 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 8 x float>, align 16
472 // CHECK-NEXT: store <vscale x 8 x float> [[A]], ptr [[A_ADDR]], align 16
473 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x float>, ptr [[A_ADDR]], align 16
474 // CHECK-NEXT: store <vscale x 8 x float> [[TMP0]], ptr [[B]], align 16
475 // CHECK-NEXT: ret void
477 void test_copy_f32x2(__clang_svfloat32x2_t a) {
478 __clang_svfloat32x2_t b{a};
481 // CHECK-LABEL: define dso_local void @_Z15test_copy_f64x213svfloat64x2_t
482 // CHECK-SAME: (<vscale x 4 x double> [[A:%.*]]) #[[ATTR0]] {
483 // CHECK-NEXT: entry:
484 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 4 x double>, align 16
485 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 4 x double>, align 16
486 // CHECK-NEXT: store <vscale x 4 x double> [[A]], ptr [[A_ADDR]], align 16
487 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x double>, ptr [[A_ADDR]], align 16
488 // CHECK-NEXT: store <vscale x 4 x double> [[TMP0]], ptr [[B]], align 16
489 // CHECK-NEXT: ret void
491 void test_copy_f64x2(__clang_svfloat64x2_t a) {
492 __clang_svfloat64x2_t b{a};
495 // CHECK-LABEL: define dso_local void @_Z16test_copy_bf16x214svbfloat16x2_t
496 // CHECK-SAME: (<vscale x 16 x bfloat> [[A:%.*]]) #[[ATTR0]] {
497 // CHECK-NEXT: entry:
498 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 16 x bfloat>, align 16
499 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 16 x bfloat>, align 16
500 // CHECK-NEXT: store <vscale x 16 x bfloat> [[A]], ptr [[A_ADDR]], align 16
501 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x bfloat>, ptr [[A_ADDR]], align 16
502 // CHECK-NEXT: store <vscale x 16 x bfloat> [[TMP0]], ptr [[B]], align 16
503 // CHECK-NEXT: ret void
505 void test_copy_bf16x2(__clang_svbfloat16x2_t a) {
506 __clang_svbfloat16x2_t b{a};
509 // CHECK-LABEL: define dso_local void @_Z14test_copy_s8x310svint8x3_t
510 // CHECK-SAME: (<vscale x 48 x i8> [[A:%.*]]) #[[ATTR0]] {
511 // CHECK-NEXT: entry:
512 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 48 x i8>, align 16
513 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 48 x i8>, align 16
514 // CHECK-NEXT: store <vscale x 48 x i8> [[A]], ptr [[A_ADDR]], align 16
515 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 48 x i8>, ptr [[A_ADDR]], align 16
516 // CHECK-NEXT: store <vscale x 48 x i8> [[TMP0]], ptr [[B]], align 16
517 // CHECK-NEXT: ret void
519 void test_copy_s8x3(__clang_svint8x3_t a) {
520 __clang_svint8x3_t b{a};
523 // CHECK-LABEL: define dso_local void @_Z15test_copy_s16x311svint16x3_t
524 // CHECK-SAME: (<vscale x 24 x i16> [[A:%.*]]) #[[ATTR0]] {
525 // CHECK-NEXT: entry:
526 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 24 x i16>, align 16
527 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 24 x i16>, align 16
528 // CHECK-NEXT: store <vscale x 24 x i16> [[A]], ptr [[A_ADDR]], align 16
529 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 24 x i16>, ptr [[A_ADDR]], align 16
530 // CHECK-NEXT: store <vscale x 24 x i16> [[TMP0]], ptr [[B]], align 16
531 // CHECK-NEXT: ret void
533 void test_copy_s16x3(__clang_svint16x3_t a) {
534 __clang_svint16x3_t b{a};
537 // CHECK-LABEL: define dso_local void @_Z15test_copy_s32x311svint32x3_t
538 // CHECK-SAME: (<vscale x 12 x i32> [[A:%.*]]) #[[ATTR0]] {
539 // CHECK-NEXT: entry:
540 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 12 x i32>, align 16
541 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 12 x i32>, align 16
542 // CHECK-NEXT: store <vscale x 12 x i32> [[A]], ptr [[A_ADDR]], align 16
543 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 12 x i32>, ptr [[A_ADDR]], align 16
544 // CHECK-NEXT: store <vscale x 12 x i32> [[TMP0]], ptr [[B]], align 16
545 // CHECK-NEXT: ret void
547 void test_copy_s32x3(__clang_svint32x3_t a) {
548 __clang_svint32x3_t b{a};
551 // CHECK-LABEL: define dso_local void @_Z15test_copy_s64x311svint64x3_t
552 // CHECK-SAME: (<vscale x 6 x i64> [[A:%.*]]) #[[ATTR0]] {
553 // CHECK-NEXT: entry:
554 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 6 x i64>, align 16
555 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 6 x i64>, align 16
556 // CHECK-NEXT: store <vscale x 6 x i64> [[A]], ptr [[A_ADDR]], align 16
557 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 6 x i64>, ptr [[A_ADDR]], align 16
558 // CHECK-NEXT: store <vscale x 6 x i64> [[TMP0]], ptr [[B]], align 16
559 // CHECK-NEXT: ret void
561 void test_copy_s64x3(__clang_svint64x3_t a) {
562 __clang_svint64x3_t b{a};
565 // CHECK-LABEL: define dso_local void @_Z14test_copy_u8x311svuint8x3_t
566 // CHECK-SAME: (<vscale x 48 x i8> [[A:%.*]]) #[[ATTR0]] {
567 // CHECK-NEXT: entry:
568 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 48 x i8>, align 16
569 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 48 x i8>, align 16
570 // CHECK-NEXT: store <vscale x 48 x i8> [[A]], ptr [[A_ADDR]], align 16
571 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 48 x i8>, ptr [[A_ADDR]], align 16
572 // CHECK-NEXT: store <vscale x 48 x i8> [[TMP0]], ptr [[B]], align 16
573 // CHECK-NEXT: ret void
575 void test_copy_u8x3(__clang_svuint8x3_t a) {
576 __clang_svuint8x3_t b{a};
579 // CHECK-LABEL: define dso_local void @_Z15test_copy_u16x312svuint16x3_t
580 // CHECK-SAME: (<vscale x 24 x i16> [[A:%.*]]) #[[ATTR0]] {
581 // CHECK-NEXT: entry:
582 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 24 x i16>, align 16
583 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 24 x i16>, align 16
584 // CHECK-NEXT: store <vscale x 24 x i16> [[A]], ptr [[A_ADDR]], align 16
585 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 24 x i16>, ptr [[A_ADDR]], align 16
586 // CHECK-NEXT: store <vscale x 24 x i16> [[TMP0]], ptr [[B]], align 16
587 // CHECK-NEXT: ret void
589 void test_copy_u16x3(__clang_svuint16x3_t a) {
590 __clang_svuint16x3_t b{a};
593 // CHECK-LABEL: define dso_local void @_Z15test_copy_u32x312svuint32x3_t
594 // CHECK-SAME: (<vscale x 12 x i32> [[A:%.*]]) #[[ATTR0]] {
595 // CHECK-NEXT: entry:
596 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 12 x i32>, align 16
597 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 12 x i32>, align 16
598 // CHECK-NEXT: store <vscale x 12 x i32> [[A]], ptr [[A_ADDR]], align 16
599 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 12 x i32>, ptr [[A_ADDR]], align 16
600 // CHECK-NEXT: store <vscale x 12 x i32> [[TMP0]], ptr [[B]], align 16
601 // CHECK-NEXT: ret void
603 void test_copy_u32x3(__clang_svuint32x3_t a) {
604 __clang_svuint32x3_t b{a};
607 // CHECK-LABEL: define dso_local void @_Z15test_copy_u64x312svuint64x3_t
608 // CHECK-SAME: (<vscale x 6 x i64> [[A:%.*]]) #[[ATTR0]] {
609 // CHECK-NEXT: entry:
610 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 6 x i64>, align 16
611 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 6 x i64>, align 16
612 // CHECK-NEXT: store <vscale x 6 x i64> [[A]], ptr [[A_ADDR]], align 16
613 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 6 x i64>, ptr [[A_ADDR]], align 16
614 // CHECK-NEXT: store <vscale x 6 x i64> [[TMP0]], ptr [[B]], align 16
615 // CHECK-NEXT: ret void
617 void test_copy_u64x3(__clang_svuint64x3_t a) {
618 __clang_svuint64x3_t b{a};
621 // CHECK-LABEL: define dso_local void @_Z15test_copy_f16x313svfloat16x3_t
622 // CHECK-SAME: (<vscale x 24 x half> [[A:%.*]]) #[[ATTR0]] {
623 // CHECK-NEXT: entry:
624 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 24 x half>, align 16
625 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 24 x half>, align 16
626 // CHECK-NEXT: store <vscale x 24 x half> [[A]], ptr [[A_ADDR]], align 16
627 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 24 x half>, ptr [[A_ADDR]], align 16
628 // CHECK-NEXT: store <vscale x 24 x half> [[TMP0]], ptr [[B]], align 16
629 // CHECK-NEXT: ret void
631 void test_copy_f16x3(__clang_svfloat16x3_t a) {
632 __clang_svfloat16x3_t b{a};
635 // CHECK-LABEL: define dso_local void @_Z15test_copy_f32x313svfloat32x3_t
636 // CHECK-SAME: (<vscale x 12 x float> [[A:%.*]]) #[[ATTR0]] {
637 // CHECK-NEXT: entry:
638 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 12 x float>, align 16
639 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 12 x float>, align 16
640 // CHECK-NEXT: store <vscale x 12 x float> [[A]], ptr [[A_ADDR]], align 16
641 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 12 x float>, ptr [[A_ADDR]], align 16
642 // CHECK-NEXT: store <vscale x 12 x float> [[TMP0]], ptr [[B]], align 16
643 // CHECK-NEXT: ret void
645 void test_copy_f32x3(__clang_svfloat32x3_t a) {
646 __clang_svfloat32x3_t b{a};
649 // CHECK-LABEL: define dso_local void @_Z15test_copy_f64x313svfloat64x3_t
650 // CHECK-SAME: (<vscale x 6 x double> [[A:%.*]]) #[[ATTR0]] {
651 // CHECK-NEXT: entry:
652 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 6 x double>, align 16
653 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 6 x double>, align 16
654 // CHECK-NEXT: store <vscale x 6 x double> [[A]], ptr [[A_ADDR]], align 16
655 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 6 x double>, ptr [[A_ADDR]], align 16
656 // CHECK-NEXT: store <vscale x 6 x double> [[TMP0]], ptr [[B]], align 16
657 // CHECK-NEXT: ret void
659 void test_copy_f64x3(__clang_svfloat64x3_t a) {
660 __clang_svfloat64x3_t b{a};
663 // CHECK-LABEL: define dso_local void @_Z16test_copy_bf16x314svbfloat16x3_t
664 // CHECK-SAME: (<vscale x 24 x bfloat> [[A:%.*]]) #[[ATTR0]] {
665 // CHECK-NEXT: entry:
666 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 24 x bfloat>, align 16
667 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 24 x bfloat>, align 16
668 // CHECK-NEXT: store <vscale x 24 x bfloat> [[A]], ptr [[A_ADDR]], align 16
669 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 24 x bfloat>, ptr [[A_ADDR]], align 16
670 // CHECK-NEXT: store <vscale x 24 x bfloat> [[TMP0]], ptr [[B]], align 16
671 // CHECK-NEXT: ret void
673 void test_copy_bf16x3(__clang_svbfloat16x3_t a) {
674 __clang_svbfloat16x3_t b{a};
677 // CHECK-LABEL: define dso_local void @_Z14test_copy_s8x410svint8x4_t
678 // CHECK-SAME: (<vscale x 64 x i8> [[A:%.*]]) #[[ATTR0]] {
679 // CHECK-NEXT: entry:
680 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 64 x i8>, align 16
681 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 64 x i8>, align 16
682 // CHECK-NEXT: store <vscale x 64 x i8> [[A]], ptr [[A_ADDR]], align 16
683 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 64 x i8>, ptr [[A_ADDR]], align 16
684 // CHECK-NEXT: store <vscale x 64 x i8> [[TMP0]], ptr [[B]], align 16
685 // CHECK-NEXT: ret void
687 void test_copy_s8x4(__clang_svint8x4_t a) {
688 __clang_svint8x4_t b{a};
691 // CHECK-LABEL: define dso_local void @_Z15test_copy_s16x411svint16x4_t
692 // CHECK-SAME: (<vscale x 32 x i16> [[A:%.*]]) #[[ATTR0]] {
693 // CHECK-NEXT: entry:
694 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 32 x i16>, align 16
695 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 32 x i16>, align 16
696 // CHECK-NEXT: store <vscale x 32 x i16> [[A]], ptr [[A_ADDR]], align 16
697 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 32 x i16>, ptr [[A_ADDR]], align 16
698 // CHECK-NEXT: store <vscale x 32 x i16> [[TMP0]], ptr [[B]], align 16
699 // CHECK-NEXT: ret void
701 void test_copy_s16x4(__clang_svint16x4_t a) {
702 __clang_svint16x4_t b{a};
705 // CHECK-LABEL: define dso_local void @_Z15test_copy_s32x411svint32x4_t
706 // CHECK-SAME: (<vscale x 16 x i32> [[A:%.*]]) #[[ATTR0]] {
707 // CHECK-NEXT: entry:
708 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 16 x i32>, align 16
709 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 16 x i32>, align 16
710 // CHECK-NEXT: store <vscale x 16 x i32> [[A]], ptr [[A_ADDR]], align 16
711 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i32>, ptr [[A_ADDR]], align 16
712 // CHECK-NEXT: store <vscale x 16 x i32> [[TMP0]], ptr [[B]], align 16
713 // CHECK-NEXT: ret void
715 void test_copy_s32x4(__clang_svint32x4_t a) {
716 __clang_svint32x4_t b{a};
719 // CHECK-LABEL: define dso_local void @_Z15test_copy_s64x411svint64x4_t
720 // CHECK-SAME: (<vscale x 8 x i64> [[A:%.*]]) #[[ATTR0]] {
721 // CHECK-NEXT: entry:
722 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 8 x i64>, align 16
723 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 8 x i64>, align 16
724 // CHECK-NEXT: store <vscale x 8 x i64> [[A]], ptr [[A_ADDR]], align 16
725 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i64>, ptr [[A_ADDR]], align 16
726 // CHECK-NEXT: store <vscale x 8 x i64> [[TMP0]], ptr [[B]], align 16
727 // CHECK-NEXT: ret void
729 void test_copy_s64x4(__clang_svint64x4_t a) {
730 __clang_svint64x4_t b{a};
733 // CHECK-LABEL: define dso_local void @_Z14test_copy_u8x411svuint8x4_t
734 // CHECK-SAME: (<vscale x 64 x i8> [[A:%.*]]) #[[ATTR0]] {
735 // CHECK-NEXT: entry:
736 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 64 x i8>, align 16
737 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 64 x i8>, align 16
738 // CHECK-NEXT: store <vscale x 64 x i8> [[A]], ptr [[A_ADDR]], align 16
739 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 64 x i8>, ptr [[A_ADDR]], align 16
740 // CHECK-NEXT: store <vscale x 64 x i8> [[TMP0]], ptr [[B]], align 16
741 // CHECK-NEXT: ret void
743 void test_copy_u8x4(__clang_svuint8x4_t a) {
744 __clang_svuint8x4_t b{a};
747 // CHECK-LABEL: define dso_local void @_Z15test_copy_u16x412svuint16x4_t
748 // CHECK-SAME: (<vscale x 32 x i16> [[A:%.*]]) #[[ATTR0]] {
749 // CHECK-NEXT: entry:
750 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 32 x i16>, align 16
751 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 32 x i16>, align 16
752 // CHECK-NEXT: store <vscale x 32 x i16> [[A]], ptr [[A_ADDR]], align 16
753 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 32 x i16>, ptr [[A_ADDR]], align 16
754 // CHECK-NEXT: store <vscale x 32 x i16> [[TMP0]], ptr [[B]], align 16
755 // CHECK-NEXT: ret void
757 void test_copy_u16x4(__clang_svuint16x4_t a) {
758 __clang_svuint16x4_t b{a};
761 // CHECK-LABEL: define dso_local void @_Z15test_copy_u32x412svuint32x4_t
762 // CHECK-SAME: (<vscale x 16 x i32> [[A:%.*]]) #[[ATTR0]] {
763 // CHECK-NEXT: entry:
764 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 16 x i32>, align 16
765 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 16 x i32>, align 16
766 // CHECK-NEXT: store <vscale x 16 x i32> [[A]], ptr [[A_ADDR]], align 16
767 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i32>, ptr [[A_ADDR]], align 16
768 // CHECK-NEXT: store <vscale x 16 x i32> [[TMP0]], ptr [[B]], align 16
769 // CHECK-NEXT: ret void
771 void test_copy_u32x4(__clang_svuint32x4_t a) {
772 __clang_svuint32x4_t b{a};
775 // CHECK-LABEL: define dso_local void @_Z15test_copy_u64x412svuint64x4_t
776 // CHECK-SAME: (<vscale x 8 x i64> [[A:%.*]]) #[[ATTR0]] {
777 // CHECK-NEXT: entry:
778 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 8 x i64>, align 16
779 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 8 x i64>, align 16
780 // CHECK-NEXT: store <vscale x 8 x i64> [[A]], ptr [[A_ADDR]], align 16
781 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i64>, ptr [[A_ADDR]], align 16
782 // CHECK-NEXT: store <vscale x 8 x i64> [[TMP0]], ptr [[B]], align 16
783 // CHECK-NEXT: ret void
785 void test_copy_u64x4(__clang_svuint64x4_t a) {
786 __clang_svuint64x4_t b{a};
789 // CHECK-LABEL: define dso_local void @_Z15test_copy_f16x413svfloat16x4_t
790 // CHECK-SAME: (<vscale x 32 x half> [[A:%.*]]) #[[ATTR0]] {
791 // CHECK-NEXT: entry:
792 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 32 x half>, align 16
793 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 32 x half>, align 16
794 // CHECK-NEXT: store <vscale x 32 x half> [[A]], ptr [[A_ADDR]], align 16
795 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 32 x half>, ptr [[A_ADDR]], align 16
796 // CHECK-NEXT: store <vscale x 32 x half> [[TMP0]], ptr [[B]], align 16
797 // CHECK-NEXT: ret void
799 void test_copy_f16x4(__clang_svfloat16x4_t a) {
800 __clang_svfloat16x4_t b{a};
803 // CHECK-LABEL: define dso_local void @_Z15test_copy_f32x413svfloat32x4_t
804 // CHECK-SAME: (<vscale x 16 x float> [[A:%.*]]) #[[ATTR0]] {
805 // CHECK-NEXT: entry:
806 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 16 x float>, align 16
807 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 16 x float>, align 16
808 // CHECK-NEXT: store <vscale x 16 x float> [[A]], ptr [[A_ADDR]], align 16
809 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x float>, ptr [[A_ADDR]], align 16
810 // CHECK-NEXT: store <vscale x 16 x float> [[TMP0]], ptr [[B]], align 16
811 // CHECK-NEXT: ret void
813 void test_copy_f32x4(__clang_svfloat32x4_t a) {
814 __clang_svfloat32x4_t b{a};
817 // CHECK-LABEL: define dso_local void @_Z15test_copy_f64x413svfloat64x4_t
818 // CHECK-SAME: (<vscale x 8 x double> [[A:%.*]]) #[[ATTR0]] {
819 // CHECK-NEXT: entry:
820 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 8 x double>, align 16
821 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 8 x double>, align 16
822 // CHECK-NEXT: store <vscale x 8 x double> [[A]], ptr [[A_ADDR]], align 16
823 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x double>, ptr [[A_ADDR]], align 16
824 // CHECK-NEXT: store <vscale x 8 x double> [[TMP0]], ptr [[B]], align 16
825 // CHECK-NEXT: ret void
827 void test_copy_f64x4(__clang_svfloat64x4_t a) {
828 __clang_svfloat64x4_t b{a};
831 // CHECK-LABEL: define dso_local void @_Z16test_copy_bf16x414svbfloat16x4_t
832 // CHECK-SAME: (<vscale x 32 x bfloat> [[A:%.*]]) #[[ATTR0]] {
833 // CHECK-NEXT: entry:
834 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 32 x bfloat>, align 16
835 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 32 x bfloat>, align 16
836 // CHECK-NEXT: store <vscale x 32 x bfloat> [[A]], ptr [[A_ADDR]], align 16
837 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 32 x bfloat>, ptr [[A_ADDR]], align 16
838 // CHECK-NEXT: store <vscale x 32 x bfloat> [[TMP0]], ptr [[B]], align 16
839 // CHECK-NEXT: ret void
841 void test_copy_bf16x4(__clang_svbfloat16x4_t a) {
842 __clang_svbfloat16x4_t b{a};
845 // CHECK-LABEL: define dso_local void @_Z12test_copy_b8u10__SVBool_t
846 // CHECK-SAME: (<vscale x 16 x i1> [[A:%.*]]) #[[ATTR0]] {
847 // CHECK-NEXT: entry:
848 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 2
849 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 16 x i1>, align 2
850 // CHECK-NEXT: store <vscale x 16 x i1> [[A]], ptr [[A_ADDR]], align 2
851 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[A_ADDR]], align 2
852 // CHECK-NEXT: store <vscale x 16 x i1> [[TMP0]], ptr [[B]], align 2
853 // CHECK-NEXT: ret void
855 void test_copy_b8(__SVBool_t a) {
856 __SVBool_t b{a};
859 // CHECK-LABEL: define dso_local void @_Z14test_copy_b8x210svboolx2_t
860 // CHECK-SAME: (<vscale x 32 x i1> [[A:%.*]]) #[[ATTR0]] {
861 // CHECK-NEXT: entry:
862 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 32 x i1>, align 2
863 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 32 x i1>, align 2
864 // CHECK-NEXT: store <vscale x 32 x i1> [[A]], ptr [[A_ADDR]], align 2
865 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 32 x i1>, ptr [[A_ADDR]], align 2
866 // CHECK-NEXT: store <vscale x 32 x i1> [[TMP0]], ptr [[B]], align 2
867 // CHECK-NEXT: ret void
869 void test_copy_b8x2(__clang_svboolx2_t a) {
870 __clang_svboolx2_t b{a};
873 // CHECK-LABEL: define dso_local void @_Z14test_copy_b8x410svboolx4_t
874 // CHECK-SAME: (<vscale x 64 x i1> [[A:%.*]]) #[[ATTR0]] {
875 // CHECK-NEXT: entry:
876 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <vscale x 64 x i1>, align 2
877 // CHECK-NEXT: [[B:%.*]] = alloca <vscale x 64 x i1>, align 2
878 // CHECK-NEXT: store <vscale x 64 x i1> [[A]], ptr [[A_ADDR]], align 2
879 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 64 x i1>, ptr [[A_ADDR]], align 2
880 // CHECK-NEXT: store <vscale x 64 x i1> [[TMP0]], ptr [[B]], align 2
881 // CHECK-NEXT: ret void
883 void test_copy_b8x4(__clang_svboolx4_t a) {
884 __clang_svboolx4_t b{a};
887 // CHECK-LABEL: define dso_local void @_Z13test_copy_cntu11__SVCount_t
888 // CHECK-SAME: (target("aarch64.svcount") [[A:%.*]]) #[[ATTR0]] {
889 // CHECK-NEXT: entry:
890 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca target("aarch64.svcount"), align 2
891 // CHECK-NEXT: [[B:%.*]] = alloca target("aarch64.svcount"), align 2
892 // CHECK-NEXT: store target("aarch64.svcount") [[A]], ptr [[A_ADDR]], align 2
893 // CHECK-NEXT: [[TMP0:%.*]] = load target("aarch64.svcount"), ptr [[A_ADDR]], align 2
894 // CHECK-NEXT: store target("aarch64.svcount") [[TMP0]], ptr [[B]], align 2
895 // CHECK-NEXT: ret void
897 void test_copy_cnt(__SVCount_t a) {
898 __SVCount_t b{a};