clang/test/CodeGenCXX/matrix-type-operators.cpp

   1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,NOOPT %s
   2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,OPT %s
   3 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
   4 using fx2x3_t = float __attribute__((matrix_type(2, 3)));
   5
   6 template <typename EltTy, unsigned Rows, unsigned Columns>
   7 struct MyMatrix {
   8   using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
   9
  10   matrix_t value;
  11 };
  12
  13 template <typename EltTy0, unsigned R0, unsigned C0>
  14 typename MyMatrix<EltTy0, R0, C0>::matrix_t add(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
  15   return A.value + B.value;
  16 }
  17
  18 void test_add_template() {
  19   // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
  20   // CHECK:       %call = call noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
  21
  22   // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
  23   // NOOPT:       [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
  24   // NOOPT:       [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
  25   // OPT:         [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  26   // OPT:         [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  27   // CHECK-NEXT:  [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
  28   // CHECK-NEXT:  ret <10 x float> [[RES]]
  29
  30   MyMatrix<float, 2, 5> Mat1;
  31   MyMatrix<float, 2, 5> Mat2;
  32   Mat1.value = add(Mat1, Mat2);
  33 }
  34
  35 template <typename EltTy0, unsigned R0, unsigned C0>
  36 typename MyMatrix<EltTy0, R0, C0>::matrix_t subtract(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
  37   return A.value - B.value;
  38 }
  39
  40 void test_subtract_template() {
  41   // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
  42   // CHECK:       %call = call noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
  43
  44   // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
  45   // NOOPT:       [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
  46   // NOOPT:       [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
  47   // OPT:         [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  48   // OPT:         [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  49   // CHECK-NEXT:  [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
  50   // CHECK-NEXT:  ret <10 x float> [[RES]]
  51
  52   MyMatrix<float, 2, 5> Mat1;
  53   MyMatrix<float, 2, 5> Mat2;
  54   Mat1.value = subtract(Mat1, Mat2);
  55 }
  56
  57 struct DoubleWrapper1 {
  58   int x;
  59   operator double() {
  60     return x;
  61   }
  62 };
  63
  64 void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) {
  65   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
  66   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
  67   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  68   // CHECK-NEXT:  [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
  69   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
  70   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
  71   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
  72   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
  73
  74   DoubleWrapper1 w1;
  75   w1.x = 10;
  76   m.value = m.value - w1;
  77 }
  78
  79 void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) {
  80   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
  81   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
  82   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
  83   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  84   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
  85   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
  86   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
  87   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
  88
  89   DoubleWrapper1 w1;
  90   w1.x = 10;
  91   m.value = w1 - m.value;
  92 }
  93
  94 struct DoubleWrapper2 {
  95   int x;
  96   operator double() {
  97     return x;
  98   }
  99 };
 100
 101 void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) {
 102   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
 103   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8{{$}}
 104   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 105   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
 106   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
 107   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 108   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 109   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
 110
 111   DoubleWrapper2 w2;
 112   w2.x = 20;
 113   m.value = m.value + w2;
 114 }
 115
 116 void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) {
 117   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
 118   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
 119   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
 120   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 121   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
 122   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 123   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
 124   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
 125
 126   DoubleWrapper2 w2;
 127   w2.x = 20;
 128   m.value = w2 + m.value;
 129 }
 130
 131 struct IntWrapper {
 132   char x;
 133   operator int() {
 134     return x;
 135   }
 136 };
 137
 138 void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) {
 139   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
 140   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
 141   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 142   // CHECK-NEXT:  [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
 143   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
 144   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
 145   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 146   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 147   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
 148
 149   IntWrapper w3;
 150   w3.x = 'c';
 151   m.value = m.value + w3;
 152 }
 153
 154 void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) {
 155   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
 156   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
 157   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
 158   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
 159   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 160   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
 161   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 162   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
 163   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
 164
 165   IntWrapper w3;
 166   w3.x = 'c';
 167   m.value = w3 - m.value;
 168 }
 169
 170 template <typename EltTy0, unsigned R0, unsigned C0, unsigned C1>
 171 typename MyMatrix<EltTy0, R0, C1>::matrix_t multiply(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, C0, C1> &B) {
 172   return A.value * B.value;
 173 }
 174
 175 MyMatrix<float, 2, 2> test_multiply_template(MyMatrix<float, 2, 5> Mat1,
 176                                              MyMatrix<float, 5, 2> Mat2) {
 177   // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
 178   // CHECK-NEXT:  entry:
 179   // CHECK-NEXT:    [[RES:%.*]] = call noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
 180   // CHECK-NEXT:    %value = getelementptr inbounds %struct.MyMatrix.1, ptr %agg.result, i32 0, i32 0
 181   // CHECK-NEXT:    store <4 x float> [[RES]], ptr %value, align 4
 182   // CHECK-NEXT:    ret void
 183   //
 184   // CHECK-LABEL:  define linkonce_odr noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
 185   // NOOPT:         [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
 186   // NOOPT:         [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
 187   // OPT:           [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 188   // OPT:           [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 189   // CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
 190   // CHECK-NEXT:    ret <4 x float> [[RES]]
 191
 192   MyMatrix<float, 2, 2> Res;
 193   Res.value = multiply(Mat1, Mat2);
 194   return Res;
 195 }
 196
 197 void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) {
 198   // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
 199   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr noundef {{.*}})
 200   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
 201   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
 202   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 203   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
 204   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 205   // CHECK-NEXT:  [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
 206   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
 207   // CHECK-NEXT:  ret void
 208   m.value = w3 * m.value;
 209 }
 210
 211 template <typename EltTy, unsigned Rows, unsigned Columns>
 212 void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e, unsigned i, unsigned j) {
 213   Mat.value[i][j] = e;
 214 }
 215
 216 void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i, unsigned j) {
 217   // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
 218   // NOOPT:         [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
 219   // NOOPT-NEXT:    [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
 220   // NOOPT-NEXT:    [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
 221   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
 222   // OPT:           [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 223   // OPT-NEXT:      [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 224   // OPT-NEXT:      [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 225   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 226   // CHECK-NEXT:    call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]])
 227   // CHECK-NEXT:    ret void
 228   //
 229   // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
 230   // NOOPT:         [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
 231   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
 232   // OPT:           [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 233   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 234   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
 235   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
 236   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 237   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
 238   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
 239   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 240   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4
 241   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
 242   // CHECK-NEXT:    [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
 243   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
 244   // CHECK-NEXT:    store <4 x i32> [[MATINS]], ptr {{.*}}, align 4
 245   // CHECK-NEXT:    ret void
 246
 247   insert(Mat, e, i, j);
 248 }
 249
 250 void test_insert_template2(MyMatrix<float, 3, 8> &Mat, float e) {
 251   // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
 252   // NOOPT:         [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
 253   // NOOPT-NEXT:    [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
 254   // OPT:           [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 255   // OPT-NEXT:      [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 256   // CHECK-NEXT:    call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5)
 257   // CHECK-NEXT:    ret void
 258   //
 259   // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
 260   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
 261   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
 262   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 263   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 264   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
 265   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
 266   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 267   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
 268   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
 269   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 270   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24
 271   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
 272   // CHECK-NEXT:    [[MAT:%.*]] = load <24 x float>, ptr {{.*}}, align 4{{$}}
 273   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
 274   // CHECK-NEXT:    store <24 x float> [[MATINS]], ptr {{.*}}, align 4
 275   // CHECK-NEXT:    ret void
 276
 277   insert(Mat, e, 2, 5);
 278 }
 279
 280 template <typename EltTy, unsigned Rows, unsigned Columns>
 281 EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) {
 282   return Mat.value[1u][0u];
 283 }
 284
 285 int test_extract_template(MyMatrix<int, 2, 2> Mat1) {
 286   // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
 287   // CHECK-NEXT:  entry:
 288   // CHECK-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(ptr noundef nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
 289   // CHECK-NEXT:    ret i32 [[CALL]]
 290   //
 291   // CHECK-LABEL: define linkonce_odr noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
 292   // NOOPT:         [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
 293   // OPT:           [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 294   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
 295   // CHECK-NEXT:    ret i32 [[MATEXT]]
 296
 297   return extract(Mat1);
 298 }
 299
 300 using double4x4 = double __attribute__((matrix_type(4, 4)));
 301
 302 template <class R, class C>
 303 auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) {}
 304
 305 double test_matrix_subscript(double4x4 m) {
 306   // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
 307   // NOOPT:         [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}}
 308   // OPT:           [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 309   // CHECK-NEXT:    [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) ptr @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2)
 310   // NOOPT-NEXT:    [[RES:%.*]] = load double, ptr [[CALL]], align 8{{$}}
 311   // OPT-NEXT:      [[RES:%.*]] = load double, ptr [[CALL]], align 8, !tbaa !{{[0-9]+}}{{$}}
 312   // CHECK-NEXT:    ret double [[RES]]
 313
 314   return matrix_subscript(m, 1, 2);
 315 }
 316
 317 const double &test_matrix_subscript_reference(const double4x4 m) {
 318   // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
 319   // CHECK-NEXT:  entry:
 320   // CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [16 x double], align 8
 321   // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca double, align 8
 322   // CHECK-NEXT:    store <16 x double> [[M:%.*]], ptr [[M_ADDR]], align 8
 323   // NOOPT:         [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8{{$}}
 324   // OPT:           [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
 325   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
 326   // CHECK-NEXT:    store double [[MATEXT]], ptr [[REF_TMP]], align 8
 327   // CHECK:         ret ptr [[REF_TMP]]
 328
 329   return m[0][1];
 330 }
 331
 332 struct UnsignedWrapper {
 333   char x;
 334   operator unsigned() {
 335     return x;
 336   }
 337 };
 338
 339 double extract_IntWrapper_idx(double4x4 &m, IntWrapper i, UnsignedWrapper j) {
 340   // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
 341   // CHECK:         [[I:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %i)
 342   // CHECK-NEXT:    [[I_ADD:%.*]] = add nsw i32 [[I]], 1
 343   // CHECK-NEXT:    [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
 344   // CHECK-NEXT:    [[J:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(ptr {{[^,]*}} %j)
 345   // CHECK-NEXT:    [[J_SUB:%.*]] = sub i32 [[J]], 1
 346   // CHECK-NEXT:    [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
 347   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
 348   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
 349   // NOOPT-NEXT:    [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
 350   // NOOPT-NEXT:    [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8{{$}}
 351   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
 352   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
 353   // OPT-NEXT:      [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 354   // OPT-NEXT:      [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
 355   // CHECK-NEXT:    [[MATEXT:%.*]]  = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
 356   // CHECK-NEXT:    ret double [[MATEXT]]
 357   return m[i + 1][j - 1];
 358 }
 359
 360 template <class T, unsigned R, unsigned C>
 361 using matrix_type = T __attribute__((matrix_type(R, C)));
 362 struct identmatrix_t {
 363   template <class T, unsigned N>
 364   operator matrix_type<T, N, N>() const {
 365     matrix_type<T, N, N> result;
 366     for (unsigned i = 0; i != N; ++i)
 367       result[i][i] = 1;
 368     return result;
 369   }
 370 };
 371
 372 constexpr identmatrix_t identmatrix;
 373
 374 void test_constexpr1(matrix_type<float, 4, 4> &m) {
 375   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
 376   // NOOPT:         [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4{{$}}
 377   // OPT:           [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 378   // CHECK-NEXT:    [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
 379   // CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
 380   // NOOPT-NEXT:    [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
 381   // OPT-NEXT:      [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 382   // CHECK-NEXT:    store <16 x float> [[ADD]], ptr [[MAT_ADDR]], align 4
 383   // CHECK-NEXT:    ret voi
 384
 385   // CHECK-LABEL: define linkonce_odr noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
 386   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
 387   // NOOPT-NEXT:   [[I:%.*]] = load i32, ptr %i, align 4{{$}}
 388   // OPT-NEXT:     [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
 389   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
 390   // NOOPT-NEXT:   [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
 391   // OPT-NEXT:     [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
 392   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
 393   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
 394   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 395   // OPT-NEXT:     [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
 396   // OPT-NEXT:     call void @llvm.assume(i1 [[CMP]])
 397   // CHECK-NEXT:   [[MAT:%.*]] = load <16 x float>, ptr %result, align 4{{$}}
 398   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
 399   // CHECK-NEXT:   store <16 x float> [[MATINS]], ptr %result, align 4
 400   // CHECK-NEXT:   br label %for.inc
 401   m = m + identmatrix;
 402 }
 403
 404 void test_constexpr2(matrix_type<int, 5, 5> &m) {
 405   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
 406   // CHECK:         [[IM:%.*]] = call noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
 407   // NOOPT:         [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4{{$}}
 408   // OPT:           [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 409   // CHECK-NEXT:    [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
 410   // CHECK-NEXT:    [[SUB2:%.*]] = add <25 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 411   // NOOPT-NEXT:    [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
 412   // OPT-NEXT:      [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 413   // CHECK-NEXT:    store <25 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4
 414   // CHECK-NEXT:    ret void
 415   //
 416
 417   // CHECK-LABEL: define linkonce_odr noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
 418   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
 419   // NOOPT-NEXT:   [[I:%.*]] = load i32, ptr %i, align 4{{$}}
 420   // OPT-NEXT:     [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
 421   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
 422   // NOOPT-NEXT:   [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
 423   // OPT-NEXT:     [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
 424   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
 425   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
 426   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 427   // OPT-NEXT:     [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25
 428   // OPT-NEXT:     call void @llvm.assume(i1 [[CMP]])
 429   // CHECK-NEXT:   [[MAT:%.*]] = load <25 x i32>, ptr %result, align 4{{$}}
 430   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
 431   // CHECK-NEXT:   store <25 x i32> [[MATINS]], ptr %result, align 4
 432   // CHECK-NEXT:   br label %for.inc
 433
 434   m = identmatrix - m + 1;
 435 }