[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / clang / test / CodeGenCXX / matrix-type-operators.cpp
blob2eb832f1aca6d0cf28516955fcdb1142ff1b56aa
1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,NOOPT %s
2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,OPT %s
3 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
4 using fx2x3_t = float __attribute__((matrix_type(2, 3)));
6 template <typename EltTy, unsigned Rows, unsigned Columns>
7 struct MyMatrix {
8 using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
10 matrix_t value;
13 template <typename EltTy0, unsigned R0, unsigned C0>
14 typename MyMatrix<EltTy0, R0, C0>::matrix_t add(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
15 return A.value + B.value;
18 void test_add_template() {
19 // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
20 // CHECK: %call = call noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
22 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
23 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
24 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
25 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
26 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
27 // CHECK-NEXT: [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
28 // CHECK-NEXT: ret <10 x float> [[RES]]
30 MyMatrix<float, 2, 5> Mat1;
31 MyMatrix<float, 2, 5> Mat2;
32 Mat1.value = add(Mat1, Mat2);
35 template <typename EltTy0, unsigned R0, unsigned C0>
36 typename MyMatrix<EltTy0, R0, C0>::matrix_t subtract(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
37 return A.value - B.value;
40 void test_subtract_template() {
41 // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
42 // CHECK: %call = call noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
44 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
45 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
46 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
47 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
48 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
49 // CHECK-NEXT: [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
50 // CHECK-NEXT: ret <10 x float> [[RES]]
52 MyMatrix<float, 2, 5> Mat1;
53 MyMatrix<float, 2, 5> Mat2;
54 Mat1.value = subtract(Mat1, Mat2);
57 struct DoubleWrapper1 {
58 int x;
59 operator double() {
60 return x;
64 void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) {
65 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
66 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
67 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
68 // CHECK-NEXT: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
69 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
70 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
71 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
72 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
74 DoubleWrapper1 w1;
75 w1.x = 10;
76 m.value = m.value - w1;
79 void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) {
80 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
81 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
82 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
83 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
84 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
85 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
86 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
87 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
89 DoubleWrapper1 w1;
90 w1.x = 10;
91 m.value = w1 - m.value;
94 struct DoubleWrapper2 {
95 int x;
96 operator double() {
97 return x;
101 void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) {
102 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
103 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8{{$}}
104 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8, !tbaa !{{[0-9]+}}{{$}}
105 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
106 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
107 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
108 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
109 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
111 DoubleWrapper2 w2;
112 w2.x = 20;
113 m.value = m.value + w2;
116 void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) {
117 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
118 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
119 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
120 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
121 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
122 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
123 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
124 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
126 DoubleWrapper2 w2;
127 w2.x = 20;
128 m.value = w2 + m.value;
131 struct IntWrapper {
132 char x;
133 operator int() {
134 return x;
138 void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) {
139 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
140 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
141 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
142 // CHECK-NEXT: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
143 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
144 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
145 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
146 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
147 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
149 IntWrapper w3;
150 w3.x = 'c';
151 m.value = m.value + w3;
154 void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) {
155 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
156 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
157 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
158 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
159 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
160 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
161 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
162 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
163 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
165 IntWrapper w3;
166 w3.x = 'c';
167 m.value = w3 - m.value;
170 template <typename EltTy0, unsigned R0, unsigned C0, unsigned C1>
171 typename MyMatrix<EltTy0, R0, C1>::matrix_t multiply(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, C0, C1> &B) {
172 return A.value * B.value;
175 MyMatrix<float, 2, 2> test_multiply_template(MyMatrix<float, 2, 5> Mat1,
176 MyMatrix<float, 5, 2> Mat2) {
177 // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
178 // CHECK-NEXT: entry:
179 // CHECK-NEXT: [[RES:%.*]] = call noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
180 // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix.1, ptr %agg.result, i32 0, i32 0
181 // CHECK-NEXT: store <4 x float> [[RES]], ptr %value, align 4
182 // CHECK-NEXT: ret void
184 // CHECK-LABEL: define linkonce_odr noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
185 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
186 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
187 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
188 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
189 // CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
190 // CHECK-NEXT: ret <4 x float> [[RES]]
192 MyMatrix<float, 2, 2> Res;
193 Res.value = multiply(Mat1, Mat2);
194 return Res;
197 void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) {
198 // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
199 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr noundef {{.*}})
200 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
201 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
202 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
203 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
204 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
205 // CHECK-NEXT: [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
206 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
207 // CHECK-NEXT: ret void
208 m.value = w3 * m.value;
211 template <typename EltTy, unsigned Rows, unsigned Columns>
212 void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e, unsigned i, unsigned j) {
213 Mat.value[i][j] = e;
216 void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i, unsigned j) {
217 // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
218 // NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
219 // NOOPT-NEXT: [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
220 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
221 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
222 // OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
223 // OPT-NEXT: [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
224 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
225 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
226 // CHECK-NEXT: call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]])
227 // CHECK-NEXT: ret void
229 // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
230 // NOOPT: [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
231 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
232 // OPT: [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
233 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
234 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
235 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
236 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
237 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
238 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
239 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
240 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4
241 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
242 // CHECK-NEXT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
243 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
244 // CHECK-NEXT: store <4 x i32> [[MATINS]], ptr {{.*}}, align 4
245 // CHECK-NEXT: ret void
247 insert(Mat, e, i, j);
250 void test_insert_template2(MyMatrix<float, 3, 8> &Mat, float e) {
251 // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
252 // NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
253 // NOOPT-NEXT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
254 // OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
255 // OPT-NEXT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
256 // CHECK-NEXT: call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5)
257 // CHECK-NEXT: ret void
259 // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
260 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
261 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
262 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
263 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
264 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
265 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
266 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
267 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
268 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
269 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
270 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24
271 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
272 // CHECK-NEXT: [[MAT:%.*]] = load <24 x float>, ptr {{.*}}, align 4{{$}}
273 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
274 // CHECK-NEXT: store <24 x float> [[MATINS]], ptr {{.*}}, align 4
275 // CHECK-NEXT: ret void
277 insert(Mat, e, 2, 5);
280 template <typename EltTy, unsigned Rows, unsigned Columns>
281 EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) {
282 return Mat.value[1u][0u];
285 int test_extract_template(MyMatrix<int, 2, 2> Mat1) {
286 // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
287 // CHECK-NEXT: entry:
288 // CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(ptr noundef nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
289 // CHECK-NEXT: ret i32 [[CALL]]
291 // CHECK-LABEL: define linkonce_odr noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
292 // NOOPT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
293 // OPT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
294 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
295 // CHECK-NEXT: ret i32 [[MATEXT]]
297 return extract(Mat1);
300 using double4x4 = double __attribute__((matrix_type(4, 4)));
302 template <class R, class C>
303 auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) {}
305 double test_matrix_subscript(double4x4 m) {
306 // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
307 // NOOPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}}
308 // OPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
309 // CHECK-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) ptr @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2)
310 // NOOPT-NEXT: [[RES:%.*]] = load double, ptr [[CALL]], align 8{{$}}
311 // OPT-NEXT: [[RES:%.*]] = load double, ptr [[CALL]], align 8, !tbaa !{{[0-9]+}}{{$}}
312 // CHECK-NEXT: ret double [[RES]]
314 return matrix_subscript(m, 1, 2);
317 const double &test_matrix_subscript_reference(const double4x4 m) {
318 // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
319 // CHECK-NEXT: entry:
320 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca [16 x double], align 8
321 // CHECK-NEXT: [[REF_TMP:%.*]] = alloca double, align 8
322 // CHECK-NEXT: store <16 x double> [[M:%.*]], ptr [[M_ADDR]], align 8
323 // NOOPT: [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8{{$}}
324 // OPT: [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
325 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
326 // CHECK-NEXT: store double [[MATEXT]], ptr [[REF_TMP]], align 8
327 // CHECK: ret ptr [[REF_TMP]]
329 return m[0][1];
332 struct UnsignedWrapper {
333 char x;
334 operator unsigned() {
335 return x;
339 double extract_IntWrapper_idx(double4x4 &m, IntWrapper i, UnsignedWrapper j) {
340 // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
341 // CHECK: [[I:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %i)
342 // CHECK-NEXT: [[I_ADD:%.*]] = add nsw i32 [[I]], 1
343 // CHECK-NEXT: [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
344 // CHECK-NEXT: [[J:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(ptr {{[^,]*}} %j)
345 // CHECK-NEXT: [[J_SUB:%.*]] = sub i32 [[J]], 1
346 // CHECK-NEXT: [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
347 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
348 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
349 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
350 // NOOPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8{{$}}
351 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
352 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
353 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
354 // OPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
355 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
356 // CHECK-NEXT: ret double [[MATEXT]]
357 return m[i + 1][j - 1];
360 template <class T, unsigned R, unsigned C>
361 using matrix_type = T __attribute__((matrix_type(R, C)));
362 struct identmatrix_t {
363 template <class T, unsigned N>
364 operator matrix_type<T, N, N>() const {
365 matrix_type<T, N, N> result;
366 for (unsigned i = 0; i != N; ++i)
367 result[i][i] = 1;
368 return result;
372 constexpr identmatrix_t identmatrix;
374 void test_constexpr1(matrix_type<float, 4, 4> &m) {
375 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
376 // NOOPT: [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4{{$}}
377 // OPT: [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
378 // CHECK-NEXT: [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
379 // CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
380 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
381 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
382 // CHECK-NEXT: store <16 x float> [[ADD]], ptr [[MAT_ADDR]], align 4
383 // CHECK-NEXT: ret voi
385 // CHECK-LABEL: define linkonce_odr noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
386 // CHECK-LABEL: for.body: ; preds = %for.cond
387 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4{{$}}
388 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
389 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
390 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
391 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
392 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
393 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
394 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
395 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
396 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
397 // CHECK-NEXT: [[MAT:%.*]] = load <16 x float>, ptr %result, align 4{{$}}
398 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
399 // CHECK-NEXT: store <16 x float> [[MATINS]], ptr %result, align 4
400 // CHECK-NEXT: br label %for.inc
401 m = m + identmatrix;
404 void test_constexpr2(matrix_type<int, 5, 5> &m) {
405 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
406 // CHECK: [[IM:%.*]] = call noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
407 // NOOPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4{{$}}
408 // OPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
409 // CHECK-NEXT: [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
410 // CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
411 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
412 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
413 // CHECK-NEXT: store <25 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4
414 // CHECK-NEXT: ret void
417 // CHECK-LABEL: define linkonce_odr noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
418 // CHECK-LABEL: for.body: ; preds = %for.cond
419 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4{{$}}
420 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
421 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
422 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
423 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
424 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
425 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
426 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
427 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25
428 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
429 // CHECK-NEXT: [[MAT:%.*]] = load <25 x i32>, ptr %result, align 4{{$}}
430 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
431 // CHECK-NEXT: store <25 x i32> [[MATINS]], ptr %result, align 4
432 // CHECK-NEXT: br label %for.inc
434 m = identmatrix - m + 1;