1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,NOOPT %s
2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,OPT %s
3 typedef double dx5x5_t
__attribute__((matrix_type(5, 5)));
4 using fx2x3_t
= float __attribute__((matrix_type(2, 3)));
6 template <typename EltTy
, unsigned Rows
, unsigned Columns
>
8 using matrix_t
= EltTy
__attribute__((matrix_type(Rows
, Columns
)));
13 template <typename EltTy0
, unsigned R0
, unsigned C0
>
14 typename MyMatrix
<EltTy0
, R0
, C0
>::matrix_t
add(MyMatrix
<EltTy0
, R0
, C0
> &A
, MyMatrix
<EltTy0
, R0
, C0
> &B
) {
15 return A
.value
+ B
.value
;
18 void test_add_template() {
19 // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
20 // CHECK: %call = call noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
22 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
23 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
24 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
25 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
26 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
27 // CHECK-NEXT: [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
28 // CHECK-NEXT: ret <10 x float> [[RES]]
30 MyMatrix
<float, 2, 5> Mat1
;
31 MyMatrix
<float, 2, 5> Mat2
;
32 Mat1
.value
= add(Mat1
, Mat2
);
35 template <typename EltTy0
, unsigned R0
, unsigned C0
>
36 typename MyMatrix
<EltTy0
, R0
, C0
>::matrix_t
subtract(MyMatrix
<EltTy0
, R0
, C0
> &A
, MyMatrix
<EltTy0
, R0
, C0
> &B
) {
37 return A
.value
- B
.value
;
40 void test_subtract_template() {
41 // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
42 // CHECK: %call = call noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
44 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
45 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
46 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
47 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
48 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
49 // CHECK-NEXT: [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
50 // CHECK-NEXT: ret <10 x float> [[RES]]
52 MyMatrix
<float, 2, 5> Mat1
;
53 MyMatrix
<float, 2, 5> Mat2
;
54 Mat1
.value
= subtract(Mat1
, Mat2
);
57 struct DoubleWrapper1
{
64 void test_DoubleWrapper1_Sub1(MyMatrix
<double, 10, 9> &m
) {
65 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
66 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
67 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
68 // CHECK-NEXT: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
69 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
70 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
71 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
72 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
76 m
.value
= m
.value
- w1
;
79 void test_DoubleWrapper1_Sub2(MyMatrix
<double, 10, 9> &m
) {
80 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
81 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
82 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
83 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
84 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
85 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
86 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
87 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
91 m
.value
= w1
- m
.value
;
94 struct DoubleWrapper2
{
101 void test_DoubleWrapper2_Add1(MyMatrix
<double, 10, 9> &m
) {
102 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
103 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8{{$}}
104 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8, !tbaa !{{[0-9]+}}{{$}}
105 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
106 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
107 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
108 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
109 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
113 m
.value
= m
.value
+ w2
;
116 void test_DoubleWrapper2_Add2(MyMatrix
<double, 10, 9> &m
) {
117 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
118 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
119 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
120 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
121 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
122 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
123 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
124 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
128 m
.value
= w2
+ m
.value
;
138 void test_IntWrapper_Add(MyMatrix
<double, 10, 9> &m
) {
139 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
140 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
141 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
142 // CHECK-NEXT: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
143 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
144 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
145 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
146 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
147 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
151 m
.value
= m
.value
+ w3
;
154 void test_IntWrapper_Sub(MyMatrix
<double, 10, 9> &m
) {
155 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
156 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
157 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
158 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
159 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
160 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
161 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
162 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
163 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
167 m
.value
= w3
- m
.value
;
170 template <typename EltTy0
, unsigned R0
, unsigned C0
, unsigned C1
>
171 typename MyMatrix
<EltTy0
, R0
, C1
>::matrix_t
multiply(MyMatrix
<EltTy0
, R0
, C0
> &A
, MyMatrix
<EltTy0
, C0
, C1
> &B
) {
172 return A
.value
* B
.value
;
175 MyMatrix
<float, 2, 2> test_multiply_template(MyMatrix
<float, 2, 5> Mat1
,
176 MyMatrix
<float, 5, 2> Mat2
) {
177 // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
178 // CHECK-NEXT: entry:
179 // CHECK-NEXT: [[RES:%.*]] = call noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
180 // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix.1, ptr %agg.result, i32 0, i32 0
181 // CHECK-NEXT: store <4 x float> [[RES]], ptr %value, align 4
182 // CHECK-NEXT: ret void
184 // CHECK-LABEL: define linkonce_odr noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
185 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
186 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
187 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
188 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
189 // CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
190 // CHECK-NEXT: ret <4 x float> [[RES]]
192 MyMatrix
<float, 2, 2> Res
;
193 Res
.value
= multiply(Mat1
, Mat2
);
197 void test_IntWrapper_Multiply(MyMatrix
<double, 10, 9> &m
, IntWrapper
&w3
) {
198 // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
199 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr noundef {{.*}})
200 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
201 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
202 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
203 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
204 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
205 // CHECK-NEXT: [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
206 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
207 // CHECK-NEXT: ret void
208 m
.value
= w3
* m
.value
;
211 template <typename EltTy
, unsigned Rows
, unsigned Columns
>
212 void insert(MyMatrix
<EltTy
, Rows
, Columns
> &Mat
, EltTy e
, unsigned i
, unsigned j
) {
216 void test_insert_template1(MyMatrix
<unsigned, 2, 2> &Mat
, unsigned e
, unsigned i
, unsigned j
) {
217 // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
218 // NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
219 // NOOPT-NEXT: [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
220 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
221 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
222 // OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
223 // OPT-NEXT: [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
224 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
225 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
226 // CHECK-NEXT: call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]])
227 // CHECK-NEXT: ret void
229 // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
230 // NOOPT: [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
231 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
232 // OPT: [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
233 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
234 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
235 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
236 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
237 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
238 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
239 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
240 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4
241 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
242 // CHECK-NEXT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
243 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
244 // CHECK-NEXT: store <4 x i32> [[MATINS]], ptr {{.*}}, align 4
245 // CHECK-NEXT: ret void
247 insert(Mat
, e
, i
, j
);
250 void test_insert_template2(MyMatrix
<float, 3, 8> &Mat
, float e
) {
251 // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
252 // NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
253 // NOOPT-NEXT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
254 // OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
255 // OPT-NEXT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
256 // CHECK-NEXT: call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5)
257 // CHECK-NEXT: ret void
259 // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
260 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
261 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
262 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
263 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
264 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
265 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
266 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
267 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
268 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
269 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
270 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24
271 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
272 // CHECK-NEXT: [[MAT:%.*]] = load <24 x float>, ptr {{.*}}, align 4{{$}}
273 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
274 // CHECK-NEXT: store <24 x float> [[MATINS]], ptr {{.*}}, align 4
275 // CHECK-NEXT: ret void
277 insert(Mat
, e
, 2, 5);
280 template <typename EltTy
, unsigned Rows
, unsigned Columns
>
281 EltTy
extract(MyMatrix
<EltTy
, Rows
, Columns
> &Mat
) {
282 return Mat
.value
[1u][0u];
285 int test_extract_template(MyMatrix
<int, 2, 2> Mat1
) {
286 // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
287 // CHECK-NEXT: entry:
288 // CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(ptr noundef nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
289 // CHECK-NEXT: ret i32 [[CALL]]
291 // CHECK-LABEL: define linkonce_odr noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
292 // NOOPT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
293 // OPT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
294 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
295 // CHECK-NEXT: ret i32 [[MATEXT]]
297 return extract(Mat1
);
300 using double4x4
= double __attribute__((matrix_type(4, 4)));
302 template <class R
, class C
>
303 auto matrix_subscript(double4x4 m
, R r
, C c
) -> decltype(m
[r
][c
]) {}
305 double test_matrix_subscript(double4x4 m
) {
306 // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
307 // NOOPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}}
308 // OPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
309 // CHECK-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) ptr @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2)
310 // NOOPT-NEXT: [[RES:%.*]] = load double, ptr [[CALL]], align 8{{$}}
311 // OPT-NEXT: [[RES:%.*]] = load double, ptr [[CALL]], align 8, !tbaa !{{[0-9]+}}{{$}}
312 // CHECK-NEXT: ret double [[RES]]
314 return matrix_subscript(m
, 1, 2);
317 const double &test_matrix_subscript_reference(const double4x4 m
) {
318 // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
319 // CHECK-NEXT: entry:
320 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca [16 x double], align 8
321 // CHECK-NEXT: [[REF_TMP:%.*]] = alloca double, align 8
322 // CHECK-NEXT: store <16 x double> [[M:%.*]], ptr [[M_ADDR]], align 8
323 // NOOPT: [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8{{$}}
324 // OPT: [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
325 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
326 // CHECK-NEXT: store double [[MATEXT]], ptr [[REF_TMP]], align 8
327 // CHECK: ret ptr [[REF_TMP]]
332 struct UnsignedWrapper
{
334 operator unsigned() {
339 double extract_IntWrapper_idx(double4x4
&m
, IntWrapper i
, UnsignedWrapper j
) {
340 // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
341 // CHECK: [[I:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %i)
342 // CHECK-NEXT: [[I_ADD:%.*]] = add nsw i32 [[I]], 1
343 // CHECK-NEXT: [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
344 // CHECK-NEXT: [[J:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(ptr {{[^,]*}} %j)
345 // CHECK-NEXT: [[J_SUB:%.*]] = sub i32 [[J]], 1
346 // CHECK-NEXT: [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
347 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
348 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
349 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
350 // NOOPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8{{$}}
351 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
352 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
353 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
354 // OPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
355 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
356 // CHECK-NEXT: ret double [[MATEXT]]
357 return m
[i
+ 1][j
- 1];
360 template <class T
, unsigned R
, unsigned C
>
361 using matrix_type
= T
__attribute__((matrix_type(R
, C
)));
362 struct identmatrix_t
{
363 template <class T
, unsigned N
>
364 operator matrix_type
<T
, N
, N
>() const {
365 matrix_type
<T
, N
, N
> result
;
366 for (unsigned i
= 0; i
!= N
; ++i
)
372 constexpr identmatrix_t identmatrix
;
374 void test_constexpr1(matrix_type
<float, 4, 4> &m
) {
375 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
376 // NOOPT: [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4{{$}}
377 // OPT: [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
378 // CHECK-NEXT: [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
379 // CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
380 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
381 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
382 // CHECK-NEXT: store <16 x float> [[ADD]], ptr [[MAT_ADDR]], align 4
383 // CHECK-NEXT: ret voi
385 // CHECK-LABEL: define linkonce_odr noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
386 // CHECK-LABEL: for.body: ; preds = %for.cond
387 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4{{$}}
388 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
389 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
390 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
391 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
392 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
393 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
394 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
395 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
396 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
397 // CHECK-NEXT: [[MAT:%.*]] = load <16 x float>, ptr %result, align 4{{$}}
398 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
399 // CHECK-NEXT: store <16 x float> [[MATINS]], ptr %result, align 4
400 // CHECK-NEXT: br label %for.inc
404 void test_constexpr2(matrix_type
<int, 5, 5> &m
) {
405 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
406 // CHECK: [[IM:%.*]] = call noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
407 // NOOPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4{{$}}
408 // OPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
409 // CHECK-NEXT: [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
410 // CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
411 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
412 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
413 // CHECK-NEXT: store <25 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4
414 // CHECK-NEXT: ret void
417 // CHECK-LABEL: define linkonce_odr noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
418 // CHECK-LABEL: for.body: ; preds = %for.cond
419 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4{{$}}
420 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
421 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
422 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
423 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
424 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
425 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
426 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
427 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25
428 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
429 // CHECK-NEXT: [[MAT:%.*]] = load <25 x i32>, ptr %result, align 4{{$}}
430 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
431 // CHECK-NEXT: store <25 x i32> [[MATINS]], ptr %result, align 4
432 // CHECK-NEXT: br label %for.inc
434 m
= identmatrix
- m
+ 1;