1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
4 // REQUIRES: aarch64-registered-target
8 #define N __ARM_FEATURE_SVE_BITS
10 typedef svint32_t fixed_int32_t
__attribute__((arm_sve_vector_bits(N
)));
11 typedef svbool_t fixed_bool_t
__attribute__((arm_sve_vector_bits(N
)));
12 typedef uint8_t uint8_vec_t
__attribute__((vector_size(N
/ 64)));
14 fixed_bool_t global_pred
;
15 fixed_int32_t global_vec
;
19 // CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i32>, align 16
20 // CHECK-NEXT: [[PRED_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 2
21 // CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
22 // CHECK-NEXT: [[PG:%.*]] = alloca <vscale x 16 x i1>, align 2
23 // CHECK-NEXT: store <vscale x 16 x i1> [[PRED:%.*]], ptr [[PRED_ADDR]], align 2
24 // CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[VEC_ADDR]], align 16
25 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PRED_ADDR]], align 2
26 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @global_pred, align 2
27 // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[TMP1]], i64 0)
28 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTFIXEDSVE]] to <vscale x 16 x i1>
29 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr @global_pred, align 2
30 // CHECK-NEXT: [[CASTFIXEDSVE2:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[TMP3]], i64 0)
31 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <vscale x 2 x i8> [[CASTFIXEDSVE2]] to <vscale x 16 x i1>
32 // CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP4]])
33 // CHECK-NEXT: store <vscale x 16 x i1> [[TMP5]], ptr [[PG]], align 2
34 // CHECK-NEXT: [[TMP6:%.*]] = load <vscale x 16 x i1>, ptr [[PG]], align 2
35 // CHECK-NEXT: [[TMP7:%.*]] = load <16 x i32>, ptr @global_vec, align 16
36 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP7]], i64 0)
37 // CHECK-NEXT: [[TMP8:%.*]] = load <vscale x 4 x i32>, ptr [[VEC_ADDR]], align 16
38 // CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP6]])
39 // CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[CASTSCALABLESVE]], <vscale x 4 x i32> [[TMP8]])
40 // CHECK-NEXT: [[CASTFIXEDSVE3:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP10]], i64 0)
41 // CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE3]], ptr [[RETVAL]], align 16
42 // CHECK-NEXT: [[TMP11:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 16
43 // CHECK-NEXT: [[CASTSCALABLESVE4:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP11]], i64 0)
44 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE4]]
46 fixed_int32_t
foo(svbool_t pred
, svint32_t vec
) {
47 svbool_t pg
= svand_z(pred
, global_pred
, global_pred
);
48 return svadd_m(pg
, global_vec
, vec
);
51 // CHECK-LABEL: @test_ptr_to_global(
53 // CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i32>, align 16
54 // CHECK-NEXT: [[GLOBAL_VEC_PTR:%.*]] = alloca ptr, align 8
55 // CHECK-NEXT: store ptr @global_vec, ptr [[GLOBAL_VEC_PTR]], align 8
56 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GLOBAL_VEC_PTR]], align 8
57 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[TMP0]], align 16
58 // CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 16
59 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 16
60 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP2]], i64 0)
61 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
63 fixed_int32_t
test_ptr_to_global() {
64 fixed_int32_t
*global_vec_ptr
;
65 global_vec_ptr
= &global_vec
;
66 return *global_vec_ptr
;
70 // Test casting pointer from fixed-length array to scalable vector.
71 // CHECK-LABEL: @array_arg(
73 // CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i32>, align 16
74 // CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8
75 // CHECK-NEXT: store ptr [[ARR:%.*]], ptr [[ARR_ADDR]], align 8
76 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
77 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <16 x i32>, ptr [[TMP0]], i64 0
78 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[ARRAYIDX]], align 16
79 // CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 16
80 // CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 16
81 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP2]], i64 0)
82 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
84 fixed_int32_t
array_arg(fixed_int32_t arr
[]) {
88 // CHECK-LABEL: @address_of_array_idx(
90 // CHECK-NEXT: [[RETVAL:%.*]] = alloca <8 x i8>, align 2
91 // CHECK-NEXT: [[ARR:%.*]] = alloca [3 x <8 x i8>], align 2
92 // CHECK-NEXT: [[PARR:%.*]] = alloca ptr, align 8
93 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[ARR]], i64 0, i64 0
94 // CHECK-NEXT: store ptr [[ARRAYIDX]], ptr [[PARR]], align 8
95 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PARR]], align 8
96 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 2
97 // CHECK-NEXT: store <8 x i8> [[TMP1]], ptr [[RETVAL]], align 2
98 // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[RETVAL]], align 2
99 // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[TMP2]], i64 0)
100 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <vscale x 2 x i8> [[CASTFIXEDSVE]] to <vscale x 16 x i1>
101 // CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
103 fixed_bool_t
address_of_array_idx() {
110 // CHECK-LABEL: @test_cast(
111 // CHECK-NEXT: entry:
112 // CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i32>, align 16
113 // CHECK-NEXT: [[PRED_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 2
114 // CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
115 // CHECK-NEXT: [[XX:%.*]] = alloca <8 x i8>, align 8
116 // CHECK-NEXT: [[YY:%.*]] = alloca <8 x i8>, align 8
117 // CHECK-NEXT: [[PG:%.*]] = alloca <vscale x 16 x i1>, align 2
118 // CHECK-NEXT: store <vscale x 16 x i1> [[PRED:%.*]], ptr [[PRED_ADDR]], align 2
119 // CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[VEC_ADDR]], align 16
120 // CHECK-NEXT: store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 0, i8 0, i8 0, i8 0>, ptr [[XX]], align 8
121 // CHECK-NEXT: store <8 x i8> <i8 2, i8 5, i8 4, i8 6, i8 0, i8 0, i8 0, i8 0>, ptr [[YY]], align 8
122 // CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PRED_ADDR]], align 2
123 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @global_pred, align 2
124 // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[TMP1]], i64 0)
125 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTFIXEDSVE]] to <vscale x 16 x i1>
126 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[XX]], align 8
127 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[YY]], align 8
128 // CHECK-NEXT: [[ADD:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
129 // CHECK-NEXT: [[CASTFIXEDSVE2:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[ADD]], i64 0)
130 // CHECK-NEXT: [[TMP5:%.*]] = bitcast <vscale x 2 x i8> [[CASTFIXEDSVE2]] to <vscale x 16 x i1>
131 // CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP5]])
132 // CHECK-NEXT: store <vscale x 16 x i1> [[TMP6]], ptr [[PG]], align 2
133 // CHECK-NEXT: [[TMP7:%.*]] = load <vscale x 16 x i1>, ptr [[PG]], align 2
134 // CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, ptr @global_vec, align 16
135 // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP8]], i64 0)
136 // CHECK-NEXT: [[TMP9:%.*]] = load <vscale x 4 x i32>, ptr [[VEC_ADDR]], align 16
137 // CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP7]])
138 // CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[CASTSCALABLESVE]], <vscale x 4 x i32> [[TMP9]])
139 // CHECK-NEXT: [[CASTFIXEDSVE3:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP11]], i64 0)
140 // CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE3]], ptr [[RETVAL]], align 16
141 // CHECK-NEXT: [[TMP12:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 16
142 // CHECK-NEXT: [[CASTSCALABLESVE4:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP12]], i64 0)
143 // CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE4]]
145 fixed_int32_t
test_cast(svbool_t pred
, svint32_t vec
) {
146 uint8_vec_t xx
= {1, 2, 3, 4};
147 uint8_vec_t yy
= {2, 5, 4, 6};
148 svbool_t pg
= svand_z(pred
, global_pred
, xx
+ yy
);
149 return svadd_m(pg
, global_vec
, vec
);