1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,NOOPT %s
2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,OPT %s
3 typedef double dx5x5_t
__attribute__((matrix_type(5, 5)));
4 using fx2x3_t
= float __attribute__((matrix_type(2, 3)));
6 template <typename EltTy
, unsigned Rows
, unsigned Columns
>
8 using matrix_t
= EltTy
__attribute__((matrix_type(Rows
, Columns
)));
13 template <typename EltTy0
, unsigned R0
, unsigned C0
>
14 typename MyMatrix
<EltTy0
, R0
, C0
>::matrix_t
add(MyMatrix
<EltTy0
, R0
, C0
> &A
, MyMatrix
<EltTy0
, R0
, C0
> &B
) {
15 return A
.value
+ B
.value
;
18 void test_add_template() {
19 // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
20 // CHECK: %call = call noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
22 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
23 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
24 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
25 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
26 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
27 // CHECK-NEXT: [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
28 // CHECK-NEXT: ret <10 x float> [[RES]]
30 MyMatrix
<float, 2, 5> Mat1
;
31 MyMatrix
<float, 2, 5> Mat2
;
32 Mat1
.value
= add(Mat1
, Mat2
);
35 template <typename EltTy0
, unsigned R0
, unsigned C0
>
36 typename MyMatrix
<EltTy0
, R0
, C0
>::matrix_t
subtract(MyMatrix
<EltTy0
, R0
, C0
> &A
, MyMatrix
<EltTy0
, R0
, C0
> &B
) {
37 return A
.value
- B
.value
;
40 void test_subtract_template() {
41 // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
42 // CHECK: %call = call noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
44 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
45 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
46 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
47 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
48 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
49 // CHECK-NEXT: [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
50 // CHECK-NEXT: ret <10 x float> [[RES]]
52 MyMatrix
<float, 2, 5> Mat1
;
53 MyMatrix
<float, 2, 5> Mat2
;
54 Mat1
.value
= subtract(Mat1
, Mat2
);
57 struct DoubleWrapper1
{
64 void test_DoubleWrapper1_Sub1(MyMatrix
<double, 10, 9> &m
) {
65 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
66 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
67 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
68 // CHECK-NEXT: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
69 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
70 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
71 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
72 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
76 m
.value
= m
.value
- w1
;
79 void test_DoubleWrapper1_Sub2(MyMatrix
<double, 10, 9> &m
) {
80 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
81 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
82 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
83 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
84 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
85 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
86 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
87 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
91 m
.value
= w1
- m
.value
;
94 struct DoubleWrapper2
{
101 void test_DoubleWrapper2_Add1(MyMatrix
<double, 10, 9> &m
) {
102 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
103 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8{{$}}
104 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8, !tbaa !{{[0-9]+}}{{$}}
105 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
106 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
107 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
108 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
109 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
113 m
.value
= m
.value
+ w2
;
116 void test_DoubleWrapper2_Add2(MyMatrix
<double, 10, 9> &m
) {
117 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
118 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
119 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
120 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
121 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
122 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
123 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
124 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
128 m
.value
= w2
+ m
.value
;
138 void test_IntWrapper_Add(MyMatrix
<double, 10, 9> &m
) {
139 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
140 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
141 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
142 // CHECK-NEXT: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
143 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
144 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
145 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
146 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
147 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
151 m
.value
= m
.value
+ w3
;
154 void test_IntWrapper_Sub(MyMatrix
<double, 10, 9> &m
) {
155 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
156 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
157 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
158 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
159 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
160 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
161 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
162 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
163 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
167 m
.value
= w3
- m
.value
;
170 template <typename EltTy0
, unsigned R0
, unsigned C0
, unsigned C1
>
171 typename MyMatrix
<EltTy0
, R0
, C1
>::matrix_t
multiply(MyMatrix
<EltTy0
, R0
, C0
> &A
, MyMatrix
<EltTy0
, C0
, C1
> &B
) {
172 return A
.value
* B
.value
;
175 MyMatrix
<float, 2, 2> test_multiply_template(MyMatrix
<float, 2, 5> Mat1
,
176 MyMatrix
<float, 5, 2> Mat2
) {
177 // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
178 // CHECK-NEXT: entry:
179 // CHECK-NEXT: [[RES:%.*]] = call noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
180 // CHECK-NEXT: %value = getelementptr inbounds nuw %struct.MyMatrix.1, ptr %agg.result, i32 0, i32 0
181 // CHECK-NEXT: store <4 x float> [[RES]], ptr %value, align 4
182 // CHECK-NEXT: ret void
184 // CHECK-LABEL: define linkonce_odr noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
185 // NOOPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
186 // NOOPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
187 // OPT: [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
188 // OPT: [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
189 // CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
190 // CHECK-NEXT: ret <4 x float> [[RES]]
192 MyMatrix
<float, 2, 2> Res
;
193 Res
.value
= multiply(Mat1
, Mat2
);
197 void test_IntWrapper_Multiply(MyMatrix
<double, 10, 9> &m
, IntWrapper
&w3
) {
198 // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
199 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr noundef {{.*}})
200 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
201 // NOOPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
202 // OPT: [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
203 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
204 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
205 // CHECK-NEXT: [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
206 // CHECK: store <90 x double> [[RES]], ptr {{.*}}, align 8
207 // CHECK-NEXT: ret void
208 m
.value
= w3
* m
.value
;
211 template <typename EltTy
, unsigned Rows
, unsigned Columns
>
212 void insert(MyMatrix
<EltTy
, Rows
, Columns
> &Mat
, EltTy e
, unsigned i
, unsigned j
) {
216 void test_insert_template1(MyMatrix
<unsigned, 2, 2> &Mat
, unsigned e
, unsigned i
, unsigned j
) {
217 // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
218 // NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
219 // NOOPT-NEXT: [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
220 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
221 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
222 // OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
223 // OPT-NEXT: [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
224 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
225 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
226 // CHECK-NEXT: call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]])
227 // CHECK-NEXT: ret void
229 // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
230 // NOOPT: [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
231 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
232 // OPT: [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
233 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
234 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
235 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
236 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
237 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
238 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
239 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
240 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4
241 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
242 // CHECK-NEXT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
243 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
244 // CHECK-NEXT: store <4 x i32> [[MATINS]], ptr {{.*}}, align 4
245 // CHECK-NEXT: ret void
247 insert(Mat
, e
, i
, j
);
250 void test_insert_template2(MyMatrix
<float, 3, 8> &Mat
, float e
) {
251 // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
252 // NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
253 // NOOPT-NEXT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
254 // OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
255 // OPT-NEXT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
256 // CHECK-NEXT: call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5)
257 // CHECK-NEXT: ret void
259 // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
260 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
261 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
262 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
263 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
264 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
265 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
266 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
267 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
268 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
269 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
270 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24
271 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
272 // CHECK-NEXT: [[MAT:%.*]] = load <24 x float>, ptr {{.*}}, align 4{{$}}
273 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
274 // CHECK-NEXT: store <24 x float> [[MATINS]], ptr {{.*}}, align 4
275 // CHECK-NEXT: ret void
277 insert(Mat
, e
, 2, 5);
280 template <typename EltTy
, unsigned Rows
, unsigned Columns
>
281 EltTy
extract(MyMatrix
<EltTy
, Rows
, Columns
> &Mat
) {
282 return Mat
.value
[1u][0u];
285 int test_extract_template(MyMatrix
<int, 2, 2> Mat1
) {
286 // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
287 // CHECK-NEXT: entry:
288 // CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(ptr noundef nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
289 // CHECK-NEXT: ret i32 [[CALL]]
291 // CHECK-LABEL: define linkonce_odr noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
292 // NOOPT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
293 // OPT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
294 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
295 // CHECK-NEXT: ret i32 [[MATEXT]]
297 return extract(Mat1
);
300 using double4x4
= double __attribute__((matrix_type(4, 4)));
302 template <class R
, class C
>
303 auto matrix_subscript(double4x4 m
, R r
, C c
) -> decltype(m
[r
][c
]) {
304 // FIXME: We can't actually do 'return m[r][c]' here currently.
309 double test_matrix_subscript(double4x4 m
) {
310 // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
311 // NOOPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}}
312 // OPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
313 // CHECK-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) ptr @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2)
314 // NOOPT-NEXT: [[RES:%.*]] = load double, ptr [[CALL]], align 8{{$}}
315 // OPT-NEXT: [[RES:%.*]] = load double, ptr [[CALL]], align 8, !tbaa !{{[0-9]+}}{{$}}
316 // CHECK-NEXT: ret double [[RES]]
318 return matrix_subscript(m
, 1, 2);
321 const double &test_matrix_subscript_reference(const double4x4 m
) {
322 // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
323 // CHECK-NEXT: entry:
324 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca [16 x double], align 8
325 // CHECK-NEXT: [[REF_TMP:%.*]] = alloca double, align 8
326 // CHECK-NEXT: store <16 x double> [[M:%.*]], ptr [[M_ADDR]], align 8
327 // NOOPT: [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8{{$}}
328 // OPT: [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
329 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
330 // CHECK-NEXT: store double [[MATEXT]], ptr [[REF_TMP]], align 8
331 // CHECK: ret ptr [[REF_TMP]]
336 struct UnsignedWrapper
{
338 operator unsigned() {
343 double extract_IntWrapper_idx(double4x4
&m
, IntWrapper i
, UnsignedWrapper j
) {
344 // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
345 // CHECK: [[I:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %i)
346 // CHECK-NEXT: [[I_ADD:%.*]] = add nsw i32 [[I]], 1
347 // CHECK-NEXT: [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
348 // CHECK-NEXT: [[J:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(ptr {{[^,]*}} %j)
349 // CHECK-NEXT: [[J_SUB:%.*]] = sub i32 [[J]], 1
350 // CHECK-NEXT: [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
351 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
352 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
353 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
354 // NOOPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8{{$}}
355 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
356 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
357 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
358 // OPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
359 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
360 // CHECK-NEXT: ret double [[MATEXT]]
361 return m
[i
+ 1][j
- 1];
364 template <class T
, unsigned R
, unsigned C
>
365 using matrix_type
= T
__attribute__((matrix_type(R
, C
)));
366 struct identmatrix_t
{
367 template <class T
, unsigned N
>
368 operator matrix_type
<T
, N
, N
>() const {
369 matrix_type
<T
, N
, N
> result
;
370 for (unsigned i
= 0; i
!= N
; ++i
)
376 constexpr identmatrix_t identmatrix
;
378 void test_constexpr1(matrix_type
<float, 4, 4> &m
) {
379 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
380 // NOOPT: [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4{{$}}
381 // OPT: [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
382 // CHECK-NEXT: [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
383 // CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
384 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
385 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
386 // CHECK-NEXT: store <16 x float> [[ADD]], ptr [[MAT_ADDR]], align 4
387 // CHECK-NEXT: ret voi
389 // CHECK-LABEL: define linkonce_odr noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
390 // CHECK-LABEL: for.body: ; preds = %for.cond
391 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4{{$}}
392 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
393 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
394 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
395 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
396 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
397 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
398 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
399 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
400 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
401 // CHECK-NEXT: [[MAT:%.*]] = load <16 x float>, ptr %result, align 4{{$}}
402 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
403 // CHECK-NEXT: store <16 x float> [[MATINS]], ptr %result, align 4
404 // CHECK-NEXT: br label %for.inc
408 void test_constexpr2(matrix_type
<int, 5, 5> &m
) {
409 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
410 // CHECK: [[IM:%.*]] = call noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
411 // NOOPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4{{$}}
412 // OPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
413 // CHECK-NEXT: [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
414 // CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], splat (i32 1)
415 // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
416 // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
417 // CHECK-NEXT: store <25 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4
418 // CHECK-NEXT: ret void
421 // CHECK-LABEL: define linkonce_odr noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
422 // CHECK-LABEL: for.body: ; preds = %for.cond
423 // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4{{$}}
424 // OPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
425 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
426 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
427 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
428 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
429 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
430 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
431 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25
432 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
433 // CHECK-NEXT: [[MAT:%.*]] = load <25 x i32>, ptr %result, align 4{{$}}
434 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
435 // CHECK-NEXT: store <25 x i32> [[MATINS]], ptr %result, align 4
436 // CHECK-NEXT: br label %for.inc
438 m
= identmatrix
- m
+ 1;