1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s
2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,OPT %s
4 typedef double dx5x5_t
__attribute__((matrix_type(5, 5)));
5 using fx2x3_t
= float __attribute__((matrix_type(2, 3)));
7 template <typename EltTy
, unsigned Rows
, unsigned Columns
>
9 using matrix_t
= EltTy
__attribute__((matrix_type(Rows
, Columns
)));
14 template <typename EltTy0
, unsigned R0
, unsigned C0
>
15 typename MyMatrix
<EltTy0
, R0
, C0
>::matrix_t
add(MyMatrix
<EltTy0
, R0
, C0
> &A
, MyMatrix
<EltTy0
, R0
, C0
> &B
) {
16 return A
.value
+ B
.value
;
19 void test_add_template() {
20 // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
21 // CHECK: %call = call noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat2)
23 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
24 // CHECK: [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
25 // CHECK: [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
26 // CHECK-NEXT: [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
27 // CHECK-NEXT: ret <10 x float> [[RES]]
29 MyMatrix
<float, 2, 5> Mat1
;
30 MyMatrix
<float, 2, 5> Mat2
;
31 Mat1
.value
= add(Mat1
, Mat2
);
34 template <typename EltTy0
, unsigned R0
, unsigned C0
>
35 typename MyMatrix
<EltTy0
, R0
, C0
>::matrix_t
subtract(MyMatrix
<EltTy0
, R0
, C0
> &A
, MyMatrix
<EltTy0
, R0
, C0
> &B
) {
36 return A
.value
- B
.value
;
39 void test_subtract_template() {
40 // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
41 // CHECK: %call = call noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat2)
43 // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
44 // CHECK: [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
45 // CHECK: [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
46 // CHECK-NEXT: [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
47 // CHECK-NEXT: ret <10 x float> [[RES]]
49 MyMatrix
<float, 2, 5> Mat1
;
50 MyMatrix
<float, 2, 5> Mat2
;
51 Mat1
.value
= subtract(Mat1
, Mat2
);
54 struct DoubleWrapper1
{
61 void test_DoubleWrapper1_Sub1(MyMatrix
<double, 10, 9> &m
) {
62 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
63 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
64 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
65 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
66 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
67 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
68 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
72 m
.value
= m
.value
- w1
;
75 void test_DoubleWrapper1_Sub2(MyMatrix
<double, 10, 9> &m
) {
76 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
77 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
78 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
79 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
80 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
81 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
82 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
86 m
.value
= w1
- m
.value
;
89 struct DoubleWrapper2
{
96 void test_DoubleWrapper2_Add1(MyMatrix
<double, 10, 9> &m
) {
97 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
98 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.+}}, align 8
99 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
100 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
101 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
102 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
103 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
107 m
.value
= m
.value
+ w2
;
110 void test_DoubleWrapper2_Add2(MyMatrix
<double, 10, 9> &m
) {
111 // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
112 // CHECK: [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
113 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
114 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
115 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
116 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
117 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
121 m
.value
= w2
+ m
.value
;
131 void test_IntWrapper_Add(MyMatrix
<double, 10, 9> &m
) {
132 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
133 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
134 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
135 // CHECK: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
136 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
137 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
138 // CHECK-NEXT: [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
139 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
143 m
.value
= m
.value
+ w3
;
146 void test_IntWrapper_Sub(MyMatrix
<double, 10, 9> &m
) {
147 // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
148 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
149 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
150 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
151 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
152 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
153 // CHECK-NEXT: [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
154 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
158 m
.value
= w3
- m
.value
;
161 template <typename EltTy0
, unsigned R0
, unsigned C0
, unsigned C1
>
162 typename MyMatrix
<EltTy0
, R0
, C1
>::matrix_t
multiply(MyMatrix
<EltTy0
, R0
, C0
> &A
, MyMatrix
<EltTy0
, C0
, C1
> &B
) {
163 return A
.value
* B
.value
;
166 MyMatrix
<float, 2, 2> test_multiply_template(MyMatrix
<float, 2, 5> Mat1
,
167 MyMatrix
<float, 5, 2> Mat2
) {
168 // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
169 // CHECK-NEXT: entry:
170 // CHECK-NEXT: [[RES:%.*]] = call noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix.2* noundef nonnull align 4 dereferenceable(40) %Mat2)
171 // CHECK-NEXT: %value = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %agg.result, i32 0, i32 0
172 // CHECK-NEXT: [[VALUE_ADDR:%.*]] = bitcast [4 x float]* %value to <4 x float>*
173 // CHECK-NEXT: store <4 x float> [[RES]], <4 x float>* [[VALUE_ADDR]], align 4
174 // CHECK-NEXT: ret void
176 // CHECK-LABEL: define linkonce_odr noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
177 // CHECK: [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
178 // CHECK: [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
179 // CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
180 // CHECK-NEXT: ret <4 x float> [[RES]]
182 MyMatrix
<float, 2, 2> Res
;
183 Res
.value
= multiply(Mat1
, Mat2
);
187 void test_IntWrapper_Multiply(MyMatrix
<double, 10, 9> &m
, IntWrapper
&w3
) {
188 // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
189 // CHECK: [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* noundef {{.*}})
190 // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 %call to double
191 // CHECK: [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
192 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
193 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
194 // CHECK-NEXT: [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
195 // CHECK: store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
197 m
.value
= w3
* m
.value
;
200 template <typename EltTy
, unsigned Rows
, unsigned Columns
>
201 void insert(MyMatrix
<EltTy
, Rows
, Columns
> &Mat
, EltTy e
, unsigned i
, unsigned j
) {
205 void test_insert_template1(MyMatrix
<unsigned, 2, 2> &Mat
, unsigned e
, unsigned i
, unsigned j
) {
206 // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
207 // CHECK: [[MAT_ADDR:%.*]] = load %struct.MyMatrix.3*, %struct.MyMatrix.3** %Mat.addr, align 8
208 // CHECK-NEXT: [[E:%.*]] = load i32, i32* %e.addr, align 4
209 // CHECK-NEXT: [[I:%.*]] = load i32, i32* %i.addr, align 4
210 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
211 // CHECK-NEXT: call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.3* noundef nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]])
212 // CHECK-NEXT: ret void
214 // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
215 // CHECK: [[E:%.*]] = load i32, i32* %e.addr, align 4
216 // CHECK: [[I:%.*]] = load i32, i32* %i.addr, align 4
217 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
218 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
219 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
220 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
221 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
222 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [4 x i32]* {{.*}} to <4 x i32>*
223 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4
224 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
225 // CHECK-NEXT: [[MAT:%.*]] = load <4 x i32>, <4 x i32>* [[MAT_ADDR]], align 4
226 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
227 // CHECK-NEXT: store <4 x i32> [[MATINS]], <4 x i32>* [[MAT_ADDR]], align 4
228 // CHECK-NEXT: ret void
230 insert(Mat
, e
, i
, j
);
233 void test_insert_template2(MyMatrix
<float, 3, 8> &Mat
, float e
) {
234 // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
235 // CHECK: [[MAT_ADDR:%.*]] = load %struct.MyMatrix.4*, %struct.MyMatrix.4** %Mat.addr, align 8
236 // CHECK-NEXT: [[E:%.*]] = load float, float* %e.addr, align 4
237 // CHECK-NEXT: call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.4* noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5)
238 // CHECK-NEXT: ret void
240 // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
241 // CHECK: [[E:%.*]] = load float, float* %e.addr, align 4
242 // CHECK: [[I:%.*]] = load i32, i32* %i.addr, align 4
243 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
244 // CHECK-NEXT: [[J:%.*]] = load i32, i32* %j.addr, align 4
245 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
246 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
247 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
248 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [24 x float]* {{.*}} to <24 x float>*
249 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24
250 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
251 // CHECK-NEXT: [[MAT:%.*]] = load <24 x float>, <24 x float>* [[MAT_ADDR]], align 4
252 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
253 // CHECK-NEXT: store <24 x float> [[MATINS]], <24 x float>* [[MAT_ADDR]], align 4
254 // CHECK-NEXT: ret void
256 insert(Mat
, e
, 2, 5);
259 template <typename EltTy
, unsigned Rows
, unsigned Columns
>
260 EltTy
extract(MyMatrix
<EltTy
, Rows
, Columns
> &Mat
) {
261 return Mat
.value
[1u][0u];
264 int test_extract_template(MyMatrix
<int, 2, 2> Mat1
) {
265 // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
266 // CHECK-NEXT: entry:
267 // CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix.5* noundef nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
268 // CHECK-NEXT: ret i32 [[CALL]]
270 // CHECK-LABEL: define linkonce_odr noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
271 // CHECK: [[MAT:%.*]] = load <4 x i32>, <4 x i32>* {{.*}}, align 4
272 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
273 // CHECK-NEXT: ret i32 [[MATEXT]]
275 return extract(Mat1
);
278 using double4x4
= double __attribute__((matrix_type(4, 4)));
280 template <class R
, class C
>
281 auto matrix_subscript(double4x4 m
, R r
, C c
) -> decltype(m
[r
][c
]) {}
283 double test_matrix_subscript(double4x4 m
) {
284 // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
285 // CHECK: [[MAT:%.*]] = load <16 x double>, <16 x double>* {{.*}}, align 8
286 // CHECK-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) double* @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2)
287 // CHECK-NEXT: [[RES:%.*]] = load double, double* [[CALL]], align 8
288 // CHECK-NEXT: ret double [[RES]]
290 return matrix_subscript(m
, 1, 2);
293 const double &test_matrix_subscript_reference(const double4x4 m
) {
294 // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
295 // CHECK-NEXT: entry:
296 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca [16 x double], align 8
297 // CHECK-NEXT: [[REF_TMP:%.*]] = alloca double, align 8
298 // CHECK-NEXT: [[NAMELESS0:%.*]] = bitcast [16 x double]* [[M_ADDR]] to <16 x double>*
299 // CHECK-NEXT: store <16 x double> [[M:%.*]], <16 x double>* [[NAMELESS0]], align 8
300 // CHECK: [[NAMELESS1:%.*]] = load <16 x double>, <16 x double>* [[NAMELESS0]], align 8
301 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
302 // CHECK-NEXT: store double [[MATEXT]], double* [[REF_TMP]], align 8
303 // CHECK: ret double* [[REF_TMP]]
308 struct UnsignedWrapper
{
310 operator unsigned() {
315 double extract_IntWrapper_idx(double4x4
&m
, IntWrapper i
, UnsignedWrapper j
) {
316 // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
317 // CHECK: [[I:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %i)
318 // CHECK-NEXT: [[I_ADD:%.*]] = add nsw i32 [[I]], 1
319 // CHECK-NEXT: [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
320 // CHECK-NEXT: [[J:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* {{[^,]*}} %j)
321 // CHECK-NEXT: [[J_SUB:%.*]] = sub i32 [[J]], 1
322 // CHECK-NEXT: [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
323 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
324 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
325 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
326 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
327 // CHECK-NEXT: [[MAT_ADDR:%.*]] = load [16 x double]*, [16 x double]** %m.addr, align 8
328 // CHECK-NEXT: [[MAT_ADDR2:%.*]] = bitcast [16 x double]* [[MAT_ADDR]] to <16 x double>*
329 // CHECK-NEXT: [[MAT:%.*]] = load <16 x double>, <16 x double>* [[MAT_ADDR2]], align 8
330 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
331 // CHECK-NEXT: ret double [[MATEXT]]
332 return m
[i
+ 1][j
- 1];
335 template <class T
, unsigned R
, unsigned C
>
336 using matrix_type
= T
__attribute__((matrix_type(R
, C
)));
337 struct identmatrix_t
{
338 template <class T
, unsigned N
>
339 operator matrix_type
<T
, N
, N
>() const {
340 matrix_type
<T
, N
, N
> result
;
341 for (unsigned i
= 0; i
!= N
; ++i
)
347 constexpr identmatrix_t identmatrix
;
349 void test_constexpr1(matrix_type
<float, 4, 4> &m
) {
350 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
351 // CHECK: [[MAT:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 4
352 // CHECK-NEXT: [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
353 // CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
354 // CHECK-NEXT: [[MAT_ADDR:%.*]] = load [16 x float]*, [16 x float]** %m.addr, align 8
355 // CHECK-NEXT: [[MAT_ADDR2:%.*]] = bitcast [16 x float]* [[MAT_ADDR]] to <16 x float>*
356 // CHECK-NEXT: store <16 x float> [[ADD]], <16 x float>* [[MAT_ADDR2]], align 4
357 // CHECK-NEXT: ret voi
359 // CHECK-LABEL: define linkonce_odr noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
360 // CHECK-LABEL: for.body: ; preds = %for.cond
361 // CHECK-NEXT: [[I:%.*]] = load i32, i32* %i, align 4
362 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
363 // CHECK-NEXT: [[I2:%.*]] = load i32, i32* %i, align 4
364 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
365 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
366 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
367 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [16 x float]* %result to <16 x float>*
368 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
369 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
370 // CHECK-NEXT: [[MAT:%.*]] = load <16 x float>, <16 x float>* [[MAT_ADDR]], align 4
371 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
372 // CHECK-NEXT: store <16 x float> [[MATINS]], <16 x float>* [[MAT_ADDR]], align 4
373 // CHECK-NEXT: br label %for.inc
377 void test_constexpr2(matrix_type
<int, 5, 5> &m
) {
378 // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
379 // CHECK: [[IM:%.*]] = call noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
380 // CHECK: [[MAT:%.*]] = load <25 x i32>, <25 x i32>* {{.*}}, align 4
381 // CHECK-NEXT: [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
382 // CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
383 // CHECK-NEXT: [[MAT_ADDR:%.*]] = load [25 x i32]*, [25 x i32]** %m.addr, align 8
384 // CHECK-NEXT: [[MAT_ADDR2:%.*]] = bitcast [25 x i32]* [[MAT_ADDR]] to <25 x i32>*
385 // CHECK-NEXT: store <25 x i32> [[SUB2]], <25 x i32>* [[MAT_ADDR2]], align 4
386 // CHECK-NEXT: ret void
389 // CHECK-LABEL: define linkonce_odr noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
390 // CHECK-LABEL: for.body: ; preds = %for.cond
391 // CHECK-NEXT: [[I:%.*]] = load i32, i32* %i, align 4
392 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
393 // CHECK-NEXT: [[I2:%.*]] = load i32, i32* %i, align 4
394 // CHECK-NEXT: [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
395 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
396 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
397 // CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [25 x i32]* %result to <25 x i32>*
398 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25
399 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
400 // CHECK-NEXT: [[MAT:%.*]] = load <25 x i32>, <25 x i32>* [[MAT_ADDR]], align 4
401 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
402 // CHECK-NEXT: store <25 x i32> [[MATINS]], <25 x i32>* [[MAT_ADDR]], align 4
403 // CHECK-NEXT: br label %for.inc
405 m
= identmatrix
- m
+ 1;