clang/test/CodeGenCXX/matrix-type-operators.cpp

   1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s
   2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,OPT %s
   3
   4 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
   5 using fx2x3_t = float __attribute__((matrix_type(2, 3)));
   6
   7 template <typename EltTy, unsigned Rows, unsigned Columns>
   8 struct MyMatrix {
   9   using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
  10
  11   matrix_t value;
  12 };
  13
  14 template <typename EltTy0, unsigned R0, unsigned C0>
  15 typename MyMatrix<EltTy0, R0, C0>::matrix_t add(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
  16   return A.value + B.value;
  17 }
  18
  19 void test_add_template() {
  20   // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
  21   // CHECK:       %call = call noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat2)
  22
  23   // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
  24   // CHECK:       [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
  25   // CHECK:       [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
  26   // CHECK-NEXT:  [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
  27   // CHECK-NEXT:  ret <10 x float> [[RES]]
  28
  29   MyMatrix<float, 2, 5> Mat1;
  30   MyMatrix<float, 2, 5> Mat2;
  31   Mat1.value = add(Mat1, Mat2);
  32 }
  33
  34 template <typename EltTy0, unsigned R0, unsigned C0>
  35 typename MyMatrix<EltTy0, R0, C0>::matrix_t subtract(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
  36   return A.value - B.value;
  37 }
  38
  39 void test_subtract_template() {
  40   // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
  41   // CHECK:       %call = call noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat2)
  42
  43   // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
  44   // CHECK:       [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
  45   // CHECK:       [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
  46   // CHECK-NEXT:  [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
  47   // CHECK-NEXT:  ret <10 x float> [[RES]]
  48
  49   MyMatrix<float, 2, 5> Mat1;
  50   MyMatrix<float, 2, 5> Mat2;
  51   Mat1.value = subtract(Mat1, Mat2);
  52 }
  53
  54 struct DoubleWrapper1 {
  55   int x;
  56   operator double() {
  57     return x;
  58   }
  59 };
  60
  61 void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) {
  62   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
  63   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
  64   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
  65   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
  66   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
  67   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
  68   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
  69
  70   DoubleWrapper1 w1;
  71   w1.x = 10;
  72   m.value = m.value - w1;
  73 }
  74
  75 void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) {
  76   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
  77   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(%struct.DoubleWrapper1* {{[^,]*}} %w1)
  78   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
  79   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
  80   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
  81   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
  82   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
  83
  84   DoubleWrapper1 w1;
  85   w1.x = 10;
  86   m.value = w1 - m.value;
  87 }
  88
  89 struct DoubleWrapper2 {
  90   int x;
  91   operator double() {
  92     return x;
  93   }
  94 };
  95
  96 void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) {
  97   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
  98   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.+}}, align 8
  99   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
 100   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
 101   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 102   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 103   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
 104
 105   DoubleWrapper2 w2;
 106   w2.x = 20;
 107   m.value = m.value + w2;
 108 }
 109
 110 void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) {
 111   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
 112   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(%struct.DoubleWrapper2* {{[^,]*}} %w2)
 113   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
 114   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
 115   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 116   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
 117   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
 118
 119   DoubleWrapper2 w2;
 120   w2.x = 20;
 121   m.value = w2 + m.value;
 122 }
 123
 124 struct IntWrapper {
 125   char x;
 126   operator int() {
 127     return x;
 128   }
 129 };
 130
 131 void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) {
 132   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
 133   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
 134   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
 135   // CHECK:       [[SCALAR_FP:%.*]] = sitofp i32 %call to double
 136   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
 137   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 138   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 139   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
 140
 141   IntWrapper w3;
 142   w3.x = 'c';
 143   m.value = m.value + w3;
 144 }
 145
 146 void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) {
 147   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
 148   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %w3)
 149   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
 150   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
 151   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
 152   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 153   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
 154   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
 155
 156   IntWrapper w3;
 157   w3.x = 'c';
 158   m.value = w3 - m.value;
 159 }
 160
 161 template <typename EltTy0, unsigned R0, unsigned C0, unsigned C1>
 162 typename MyMatrix<EltTy0, R0, C1>::matrix_t multiply(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, C0, C1> &B) {
 163   return A.value * B.value;
 164 }
 165
 166 MyMatrix<float, 2, 2> test_multiply_template(MyMatrix<float, 2, 5> Mat1,
 167                                              MyMatrix<float, 5, 2> Mat2) {
 168   // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
 169   // CHECK-NEXT:  entry:
 170   // CHECK-NEXT:    [[RES:%.*]] = call noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(%struct.MyMatrix* noundef nonnull align 4 dereferenceable(40) %Mat1, %struct.MyMatrix.2* noundef nonnull align 4 dereferenceable(40) %Mat2)
 171   // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix.1, %struct.MyMatrix.1* %agg.result, i32 0, i32 0
 172   // CHECK-NEXT:    [[VALUE_ADDR:%.*]] = bitcast [4 x float]* %value to <4 x float>*
 173   // CHECK-NEXT:    store <4 x float> [[RES]], <4 x float>* [[VALUE_ADDR]], align 4
 174   // CHECK-NEXT:    ret void
 175   //
 176   // CHECK-LABEL:  define linkonce_odr noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
 177   // CHECK:         [[MAT1:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
 178   // CHECK:         [[MAT2:%.*]] = load <10 x float>, <10 x float>* {{.*}}, align 4
 179   // CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
 180   // CHECK-NEXT:    ret <4 x float> [[RES]]
 181
 182   MyMatrix<float, 2, 2> Res;
 183   Res.value = multiply(Mat1, Mat2);
 184   return Res;
 185 }
 186
 187 void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) {
 188   // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
 189   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* noundef {{.*}})
 190   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
 191   // CHECK:       [[MATRIX:%.*]] = load <90 x double>, <90 x double>* {{.*}}, align 8
 192   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
 193   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 194   // CHECK-NEXT:  [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
 195   // CHECK:       store <90 x double> [[RES]], <90 x double>* {{.*}}, align 8
 196   // CHECK:       ret void
 197   m.value = w3 * m.value;
 198 }
 199
 200 template <typename EltTy, unsigned Rows, unsigned Columns>
 201 void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e, unsigned i, unsigned j) {
 202   Mat.value[i][j] = e;
 203 }
 204
 205 void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i, unsigned j) {
 206   // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
 207   // CHECK:         [[MAT_ADDR:%.*]] = load %struct.MyMatrix.3*, %struct.MyMatrix.3** %Mat.addr, align 8
 208   // CHECK-NEXT:    [[E:%.*]] = load i32, i32* %e.addr, align 4
 209   // CHECK-NEXT:    [[I:%.*]] = load i32, i32* %i.addr, align 4
 210   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
 211   // CHECK-NEXT:    call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.3* noundef nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]])
 212   // CHECK-NEXT:    ret void
 213   //
 214   // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
 215   // CHECK:         [[E:%.*]] = load i32, i32* %e.addr, align 4
 216   // CHECK:         [[I:%.*]] = load i32, i32* %i.addr, align 4
 217   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
 218   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
 219   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
 220   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
 221   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 222   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [4 x i32]* {{.*}} to <4 x i32>*
 223   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4
 224   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
 225   // CHECK-NEXT:    [[MAT:%.*]] = load <4 x i32>, <4 x i32>* [[MAT_ADDR]], align 4
 226   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
 227   // CHECK-NEXT:    store <4 x i32> [[MATINS]], <4 x i32>* [[MAT_ADDR]], align 4
 228   // CHECK-NEXT:    ret void
 229
 230   insert(Mat, e, i, j);
 231 }
 232
 233 void test_insert_template2(MyMatrix<float, 3, 8> &Mat, float e) {
 234   // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
 235   // CHECK:         [[MAT_ADDR:%.*]] = load %struct.MyMatrix.4*, %struct.MyMatrix.4** %Mat.addr, align 8
 236   // CHECK-NEXT:    [[E:%.*]] = load float, float* %e.addr, align 4
 237   // CHECK-NEXT:    call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(%struct.MyMatrix.4* noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5)
 238   // CHECK-NEXT:    ret void
 239   //
 240   // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
 241   // CHECK:         [[E:%.*]] = load float, float* %e.addr, align 4
 242   // CHECK:         [[I:%.*]] = load i32, i32* %i.addr, align 4
 243   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
 244   // CHECK-NEXT:    [[J:%.*]] = load i32, i32* %j.addr, align 4
 245   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
 246   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
 247   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 248   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [24 x float]* {{.*}} to <24 x float>*
 249   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24
 250   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
 251   // CHECK-NEXT:    [[MAT:%.*]] = load <24 x float>, <24 x float>* [[MAT_ADDR]], align 4
 252   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
 253   // CHECK-NEXT:    store <24 x float> [[MATINS]], <24 x float>* [[MAT_ADDR]], align 4
 254   // CHECK-NEXT:    ret void
 255
 256   insert(Mat, e, 2, 5);
 257 }
 258
 259 template <typename EltTy, unsigned Rows, unsigned Columns>
 260 EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) {
 261   return Mat.value[1u][0u];
 262 }
 263
 264 int test_extract_template(MyMatrix<int, 2, 2> Mat1) {
 265   // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
 266   // CHECK-NEXT:  entry:
 267   // CHECK-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(%struct.MyMatrix.5* noundef nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
 268   // CHECK-NEXT:    ret i32 [[CALL]]
 269   //
 270   // CHECK-LABEL: define linkonce_odr noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
 271   // CHECK:         [[MAT:%.*]] = load <4 x i32>, <4 x i32>* {{.*}}, align 4
 272   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
 273   // CHECK-NEXT:    ret i32 [[MATEXT]]
 274
 275   return extract(Mat1);
 276 }
 277
 278 using double4x4 = double __attribute__((matrix_type(4, 4)));
 279
 280 template <class R, class C>
 281 auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) {}
 282
 283 double test_matrix_subscript(double4x4 m) {
 284   // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
 285   // CHECK:         [[MAT:%.*]] = load <16 x double>, <16 x double>* {{.*}}, align 8
 286   // CHECK-NEXT:    [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) double* @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2)
 287   // CHECK-NEXT:    [[RES:%.*]] = load double, double* [[CALL]], align 8
 288   // CHECK-NEXT:    ret double [[RES]]
 289
 290   return matrix_subscript(m, 1, 2);
 291 }
 292
 293 const double &test_matrix_subscript_reference(const double4x4 m) {
 294   // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
 295   // CHECK-NEXT:  entry:
 296   // CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [16 x double], align 8
 297   // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca double, align 8
 298   // CHECK-NEXT:    [[NAMELESS0:%.*]] = bitcast [16 x double]* [[M_ADDR]] to <16 x double>*
 299   // CHECK-NEXT:    store <16 x double> [[M:%.*]], <16 x double>* [[NAMELESS0]], align 8
 300   // CHECK:         [[NAMELESS1:%.*]] = load <16 x double>, <16 x double>* [[NAMELESS0]], align 8
 301   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
 302   // CHECK-NEXT:    store double [[MATEXT]], double* [[REF_TMP]], align 8
 303   // CHECK:         ret double* [[REF_TMP]]
 304
 305   return m[0][1];
 306 }
 307
 308 struct UnsignedWrapper {
 309   char x;
 310   operator unsigned() {
 311     return x;
 312   }
 313 };
 314
 315 double extract_IntWrapper_idx(double4x4 &m, IntWrapper i, UnsignedWrapper j) {
 316   // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
 317   // CHECK:         [[I:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(%struct.IntWrapper* {{[^,]*}} %i)
 318   // CHECK-NEXT:    [[I_ADD:%.*]] = add nsw i32 [[I]], 1
 319   // CHECK-NEXT:    [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
 320   // CHECK-NEXT:    [[J:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* {{[^,]*}} %j)
 321   // CHECK-NEXT:    [[J_SUB:%.*]] = sub i32 [[J]], 1
 322   // CHECK-NEXT:    [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
 323   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
 324   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
 325   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
 326   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
 327   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = load [16 x double]*, [16 x double]** %m.addr, align 8
 328   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [16 x double]* [[MAT_ADDR]] to <16 x double>*
 329   // CHECK-NEXT:    [[MAT:%.*]] = load <16 x double>, <16 x double>* [[MAT_ADDR2]], align 8
 330   // CHECK-NEXT:    [[MATEXT:%.*]]  = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
 331   // CHECK-NEXT:    ret double [[MATEXT]]
 332   return m[i + 1][j - 1];
 333 }
 334
 335 template <class T, unsigned R, unsigned C>
 336 using matrix_type = T __attribute__((matrix_type(R, C)));
 337 struct identmatrix_t {
 338   template <class T, unsigned N>
 339   operator matrix_type<T, N, N>() const {
 340     matrix_type<T, N, N> result;
 341     for (unsigned i = 0; i != N; ++i)
 342       result[i][i] = 1;
 343     return result;
 344   }
 345 };
 346
 347 constexpr identmatrix_t identmatrix;
 348
 349 void test_constexpr1(matrix_type<float, 4, 4> &m) {
 350   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
 351   // CHECK:         [[MAT:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 4
 352   // CHECK-NEXT:    [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
 353   // CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
 354   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = load [16 x float]*, [16 x float]** %m.addr, align 8
 355   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [16 x float]* [[MAT_ADDR]] to <16 x float>*
 356   // CHECK-NEXT:    store <16 x float> [[ADD]], <16 x float>* [[MAT_ADDR2]], align 4
 357   // CHECK-NEXT:    ret voi
 358
 359   // CHECK-LABEL: define linkonce_odr noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
 360   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
 361   // CHECK-NEXT:   [[I:%.*]] = load i32, i32* %i, align 4
 362   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
 363   // CHECK-NEXT:   [[I2:%.*]] = load i32, i32* %i, align 4
 364   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
 365   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
 366   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 367   // CHECK-NEXT:   [[MAT_ADDR:%.*]] = bitcast [16 x float]* %result to <16 x float>*
 368   // OPT-NEXT:     [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
 369   // OPT-NEXT:     call void @llvm.assume(i1 [[CMP]])
 370   // CHECK-NEXT:   [[MAT:%.*]] = load <16 x float>, <16 x float>* [[MAT_ADDR]], align 4
 371   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
 372   // CHECK-NEXT:   store <16 x float> [[MATINS]], <16 x float>* [[MAT_ADDR]], align 4
 373   // CHECK-NEXT:   br label %for.inc
 374   m = m + identmatrix;
 375 }
 376
 377 void test_constexpr2(matrix_type<int, 5, 5> &m) {
 378   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
 379   // CHECK:         [[IM:%.*]] = call noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(%struct.identmatrix_t* {{[^,]*}} @_ZL11identmatrix)
 380   // CHECK:         [[MAT:%.*]] = load <25 x i32>, <25 x i32>* {{.*}}, align 4
 381   // CHECK-NEXT:    [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
 382   // CHECK-NEXT:    [[SUB2:%.*]] = add <25 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 383   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = load [25 x i32]*, [25 x i32]** %m.addr, align 8
 384   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [25 x i32]* [[MAT_ADDR]] to <25 x i32>*
 385   // CHECK-NEXT:    store <25 x i32> [[SUB2]], <25 x i32>* [[MAT_ADDR2]], align 4
 386   // CHECK-NEXT:    ret void
 387   //
 388
 389   // CHECK-LABEL: define linkonce_odr noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
 390   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
 391   // CHECK-NEXT:   [[I:%.*]] = load i32, i32* %i, align 4
 392   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
 393   // CHECK-NEXT:   [[I2:%.*]] = load i32, i32* %i, align 4
 394   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
 395   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
 396   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 397   // CHECK-NEXT:   [[MAT_ADDR:%.*]] = bitcast [25 x i32]* %result to <25 x i32>*
 398   // OPT-NEXT:     [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25
 399   // OPT-NEXT:     call void @llvm.assume(i1 [[CMP]])
 400   // CHECK-NEXT:   [[MAT:%.*]] = load <25 x i32>, <25 x i32>* [[MAT_ADDR]], align 4
 401   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
 402   // CHECK-NEXT:   store <25 x i32> [[MATINS]], <25 x i32>* [[MAT_ADDR]], align 4
 403   // CHECK-NEXT:   br label %for.inc
 404
 405   m = identmatrix - m + 1;
 406 }