clang/test/CodeGenCXX/matrix-type-operators.cpp

   1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,NOOPT %s
   2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck --check-prefixes=CHECK,OPT %s
   3 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
   4 using fx2x3_t = float __attribute__((matrix_type(2, 3)));
   5
   6 template <typename EltTy, unsigned Rows, unsigned Columns>
   7 struct MyMatrix {
   8   using matrix_t = EltTy __attribute__((matrix_type(Rows, Columns)));
   9
  10   matrix_t value;
  11 };
  12
  13 template <typename EltTy0, unsigned R0, unsigned C0>
  14 typename MyMatrix<EltTy0, R0, C0>::matrix_t add(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
  15   return A.value + B.value;
  16 }
  17
  18 void test_add_template() {
  19   // CHECK-LABEL: define{{.*}} void @_Z17test_add_templatev()
  20   // CHECK:       %call = call noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
  21
  22   // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z3addIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
  23   // NOOPT:       [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
  24   // NOOPT:       [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
  25   // OPT:         [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  26   // OPT:         [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  27   // CHECK-NEXT:  [[RES:%.*]] = fadd <10 x float> [[MAT1]], [[MAT2]]
  28   // CHECK-NEXT:  ret <10 x float> [[RES]]
  29
  30   MyMatrix<float, 2, 5> Mat1;
  31   MyMatrix<float, 2, 5> Mat2;
  32   Mat1.value = add(Mat1, Mat2);
  33 }
  34
  35 template <typename EltTy0, unsigned R0, unsigned C0>
  36 typename MyMatrix<EltTy0, R0, C0>::matrix_t subtract(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, R0, C0> &B) {
  37   return A.value - B.value;
  38 }
  39
  40 void test_subtract_template() {
  41   // CHECK-LABEL: define{{.*}} void @_Z22test_subtract_templatev()
  42   // CHECK:       %call = call noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
  43
  44   // CHECK-LABEL: define linkonce_odr noundef <10 x float> @_Z8subtractIfLj2ELj5EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tERS2_S4_(
  45   // NOOPT:       [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
  46   // NOOPT:       [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
  47   // OPT:         [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  48   // OPT:         [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  49   // CHECK-NEXT:  [[RES:%.*]] = fsub <10 x float> [[MAT1]], [[MAT2]]
  50   // CHECK-NEXT:  ret <10 x float> [[RES]]
  51
  52   MyMatrix<float, 2, 5> Mat1;
  53   MyMatrix<float, 2, 5> Mat2;
  54   Mat1.value = subtract(Mat1, Mat2);
  55 }
  56
  57 struct DoubleWrapper1 {
  58   int x;
  59   operator double() {
  60     return x;
  61   }
  62 };
  63
  64 void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) {
  65   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub1R8MyMatrixIdLj10ELj9EE(
  66   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
  67   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  68   // CHECK-NEXT:  [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
  69   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
  70   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
  71   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
  72   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
  73
  74   DoubleWrapper1 w1;
  75   w1.x = 10;
  76   m.value = m.value - w1;
  77 }
  78
  79 void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) {
  80   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper1_Sub2R8MyMatrixIdLj10ELj9EE(
  81   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
  82   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
  83   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  84   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
  85   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
  86   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
  87   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
  88
  89   DoubleWrapper1 w1;
  90   w1.x = 10;
  91   m.value = w1 - m.value;
  92 }
  93
  94 struct DoubleWrapper2 {
  95   int x;
  96   operator double() {
  97     return x;
  98   }
  99 };
 100
 101 void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) {
 102   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add1R8MyMatrixIdLj10ELj9EE(
 103   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8{{$}}
 104   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 105   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
 106   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
 107   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 108   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 109   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
 110
 111   DoubleWrapper2 w2;
 112   w2.x = 20;
 113   m.value = m.value + w2;
 114 }
 115
 116 void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) {
 117   // CHECK-LABEL: define{{.*}} void @_Z24test_DoubleWrapper2_Add2R8MyMatrixIdLj10ELj9EE(
 118   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
 119   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
 120   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 121   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
 122   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 123   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
 124   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
 125
 126   DoubleWrapper2 w2;
 127   w2.x = 20;
 128   m.value = w2 + m.value;
 129 }
 130
 131 struct IntWrapper {
 132   char x;
 133   operator int() {
 134     return x;
 135   }
 136 };
 137
 138 void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) {
 139   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_AddR8MyMatrixIdLj10ELj9EE(
 140   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
 141   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 142   // CHECK-NEXT:  [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
 143   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
 144   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
 145   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 146   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 147   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
 148
 149   IntWrapper w3;
 150   w3.x = 'c';
 151   m.value = m.value + w3;
 152 }
 153
 154 void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) {
 155   // CHECK-LABEL: define{{.*}} void @_Z19test_IntWrapper_SubR8MyMatrixIdLj10ELj9EE(
 156   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
 157   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
 158   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
 159   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 160   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
 161   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 162   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
 163   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
 164
 165   IntWrapper w3;
 166   w3.x = 'c';
 167   m.value = w3 - m.value;
 168 }
 169
 170 template <typename EltTy0, unsigned R0, unsigned C0, unsigned C1>
 171 typename MyMatrix<EltTy0, R0, C1>::matrix_t multiply(MyMatrix<EltTy0, R0, C0> &A, MyMatrix<EltTy0, C0, C1> &B) {
 172   return A.value * B.value;
 173 }
 174
 175 MyMatrix<float, 2, 2> test_multiply_template(MyMatrix<float, 2, 5> Mat1,
 176                                              MyMatrix<float, 5, 2> Mat2) {
 177   // CHECK-LABEL: define{{.*}} void @_Z22test_multiply_template8MyMatrixIfLj2ELj5EES_IfLj5ELj2EE(
 178   // CHECK-NEXT:  entry:
 179   // CHECK-NEXT:    [[RES:%.*]] = call noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(ptr noundef nonnull align 4 dereferenceable(40) %Mat1, ptr noundef nonnull align 4 dereferenceable(40) %Mat2)
 180   // CHECK-NEXT:    %value = getelementptr inbounds nuw %struct.MyMatrix.1, ptr %agg.result, i32 0, i32 0
 181   // CHECK-NEXT:    store <4 x float> [[RES]], ptr %value, align 4
 182   // CHECK-NEXT:    ret void
 183   //
 184   // CHECK-LABEL:  define linkonce_odr noundef <4 x float> @_Z8multiplyIfLj2ELj5ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tERS0_IS1_XT0_EXT1_EERS0_IS1_XT1_EXT2_EE(
 185   // NOOPT:         [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
 186   // NOOPT:         [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4{{$}}
 187   // OPT:           [[MAT1:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 188   // OPT:           [[MAT2:%.*]] = load <10 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 189   // CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v10f32.v10f32(<10 x float> [[MAT1]], <10 x float> [[MAT2]], i32 2, i32 5, i32 2)
 190   // CHECK-NEXT:    ret <4 x float> [[RES]]
 191
 192   MyMatrix<float, 2, 2> Res;
 193   Res.value = multiply(Mat1, Mat2);
 194   return Res;
 195 }
 196
 197 void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) {
 198   // CHECK-LABEL: define{{.*}} void @_Z24test_IntWrapper_MultiplyR8MyMatrixIdLj10ELj9EER10IntWrapper(
 199   // CHECK:       [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr noundef {{.*}})
 200   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
 201   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
 202   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 203   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
 204   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
 205   // CHECK-NEXT:  [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
 206   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
 207   // CHECK-NEXT:  ret void
 208   m.value = w3 * m.value;
 209 }
 210
 211 template <typename EltTy, unsigned Rows, unsigned Columns>
 212 void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e, unsigned i, unsigned j) {
 213   Mat.value[i][j] = e;
 214 }
 215
 216 void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i, unsigned j) {
 217   // CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
 218   // NOOPT:         [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
 219   // NOOPT-NEXT:    [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
 220   // NOOPT-NEXT:    [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
 221   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
 222   // OPT:           [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 223   // OPT-NEXT:      [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 224   // OPT-NEXT:      [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 225   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 226   // CHECK-NEXT:    call void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(16) [[MAT_ADDR]], i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]])
 227   // CHECK-NEXT:    ret void
 228   //
 229   // CHECK-LABEL: define linkonce_odr void @_Z6insertIjLj2ELj2EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
 230   // NOOPT:         [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
 231   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
 232   // OPT:           [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 233   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 234   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
 235   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
 236   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 237   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
 238   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
 239   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 240   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4
 241   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
 242   // CHECK-NEXT:    [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
 243   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
 244   // CHECK-NEXT:    store <4 x i32> [[MATINS]], ptr {{.*}}, align 4
 245   // CHECK-NEXT:    ret void
 246
 247   insert(Mat, e, i, j);
 248 }
 249
 250 void test_insert_template2(MyMatrix<float, 3, 8> &Mat, float e) {
 251   // CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
 252   // NOOPT:         [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
 253   // NOOPT-NEXT:    [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
 254   // OPT:           [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 255   // OPT-NEXT:      [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 256   // CHECK-NEXT:    call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5)
 257   // CHECK-NEXT:    ret void
 258   //
 259   // CHECK-LABEL: define linkonce_odr void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(
 260   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
 261   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
 262   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 263   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 264   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
 265   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
 266   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 267   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
 268   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
 269   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 270   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24
 271   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
 272   // CHECK-NEXT:    [[MAT:%.*]] = load <24 x float>, ptr {{.*}}, align 4{{$}}
 273   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
 274   // CHECK-NEXT:    store <24 x float> [[MATINS]], ptr {{.*}}, align 4
 275   // CHECK-NEXT:    ret void
 276
 277   insert(Mat, e, 2, 5);
 278 }
 279
 280 template <typename EltTy, unsigned Rows, unsigned Columns>
 281 EltTy extract(MyMatrix<EltTy, Rows, Columns> &Mat) {
 282   return Mat.value[1u][0u];
 283 }
 284
 285 int test_extract_template(MyMatrix<int, 2, 2> Mat1) {
 286   // CHECK-LABEL: @_Z21test_extract_template8MyMatrixIiLj2ELj2EE(
 287   // CHECK-NEXT:  entry:
 288   // CHECK-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(ptr noundef nonnull align 4 dereferenceable(16) [[MAT1:%.*]])
 289   // CHECK-NEXT:    ret i32 [[CALL]]
 290   //
 291   // CHECK-LABEL: define linkonce_odr noundef i32 @_Z7extractIiLj2ELj2EET_R8MyMatrixIS0_XT0_EXT1_EE(
 292   // NOOPT:         [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}}
 293   // OPT:           [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 294   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], i64 1
 295   // CHECK-NEXT:    ret i32 [[MATEXT]]
 296
 297   return extract(Mat1);
 298 }
 299
 300 using double4x4 = double __attribute__((matrix_type(4, 4)));
 301
 302 template <class R, class C>
 303 auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) {
 304   // FIXME: We can't actually do 'return m[r][c]' here currently.
 305   static double d;
 306   return d;
 307 }
 308
 309 double test_matrix_subscript(double4x4 m) {
 310   // CHECK-LABEL: @_Z21test_matrix_subscriptu11matrix_typeILm4ELm4EdE(
 311   // NOOPT:         [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}}
 312   // OPT:           [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 313   // CHECK-NEXT:    [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) ptr @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeILm4ELm4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2)
 314   // NOOPT-NEXT:    [[RES:%.*]] = load double, ptr [[CALL]], align 8{{$}}
 315   // OPT-NEXT:      [[RES:%.*]] = load double, ptr [[CALL]], align 8, !tbaa !{{[0-9]+}}{{$}}
 316   // CHECK-NEXT:    ret double [[RES]]
 317
 318   return matrix_subscript(m, 1, 2);
 319 }
 320
 321 const double &test_matrix_subscript_reference(const double4x4 m) {
 322   // CHECK-LABEL: @_Z31test_matrix_subscript_referenceu11matrix_typeILm4ELm4EdE(
 323   // CHECK-NEXT:  entry:
 324   // CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [16 x double], align 8
 325   // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca double, align 8
 326   // CHECK-NEXT:    store <16 x double> [[M:%.*]], ptr [[M_ADDR]], align 8
 327   // NOOPT:         [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8{{$}}
 328   // OPT:           [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
 329   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], i64 4
 330   // CHECK-NEXT:    store double [[MATEXT]], ptr [[REF_TMP]], align 8
 331   // CHECK:         ret ptr [[REF_TMP]]
 332
 333   return m[0][1];
 334 }
 335
 336 struct UnsignedWrapper {
 337   char x;
 338   operator unsigned() {
 339     return x;
 340   }
 341 };
 342
 343 double extract_IntWrapper_idx(double4x4 &m, IntWrapper i, UnsignedWrapper j) {
 344   // CHECK-LABEL: define{{.*}} double @_Z22extract_IntWrapper_idxRu11matrix_typeILm4ELm4EdE10IntWrapper15UnsignedWrapper(
 345   // CHECK:         [[I:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %i)
 346   // CHECK-NEXT:    [[I_ADD:%.*]] = add nsw i32 [[I]], 1
 347   // CHECK-NEXT:    [[I_ADD_EXT:%.*]] = sext i32 [[I_ADD]] to i64
 348   // CHECK-NEXT:    [[J:%.*]] = call noundef i32 @_ZN15UnsignedWrappercvjEv(ptr {{[^,]*}} %j)
 349   // CHECK-NEXT:    [[J_SUB:%.*]] = sub i32 [[J]], 1
 350   // CHECK-NEXT:    [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
 351   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
 352   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
 353   // NOOPT-NEXT:    [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
 354   // NOOPT-NEXT:    [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8{{$}}
 355   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
 356   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
 357   // OPT-NEXT:      [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 358   // OPT-NEXT:      [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
 359   // CHECK-NEXT:    [[MATEXT:%.*]]  = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
 360   // CHECK-NEXT:    ret double [[MATEXT]]
 361   return m[i + 1][j - 1];
 362 }
 363
 364 template <class T, unsigned R, unsigned C>
 365 using matrix_type = T __attribute__((matrix_type(R, C)));
 366 struct identmatrix_t {
 367   template <class T, unsigned N>
 368   operator matrix_type<T, N, N>() const {
 369     matrix_type<T, N, N> result;
 370     for (unsigned i = 0; i != N; ++i)
 371       result[i][i] = 1;
 372     return result;
 373   }
 374 };
 375
 376 constexpr identmatrix_t identmatrix;
 377
 378 void test_constexpr1(matrix_type<float, 4, 4> &m) {
 379   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr1Ru11matrix_typeILm4ELm4EfE(
 380   // NOOPT:         [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4{{$}}
 381   // OPT:           [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 382   // CHECK-NEXT:    [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
 383   // CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
 384   // NOOPT-NEXT:    [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
 385   // OPT-NEXT:      [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 386   // CHECK-NEXT:    store <16 x float> [[ADD]], ptr [[MAT_ADDR]], align 4
 387   // CHECK-NEXT:    ret voi
 388
 389   // CHECK-LABEL: define linkonce_odr noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(
 390   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
 391   // NOOPT-NEXT:   [[I:%.*]] = load i32, ptr %i, align 4{{$}}
 392   // OPT-NEXT:     [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
 393   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
 394   // NOOPT-NEXT:   [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
 395   // OPT-NEXT:     [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
 396   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
 397   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
 398   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 399   // OPT-NEXT:     [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
 400   // OPT-NEXT:     call void @llvm.assume(i1 [[CMP]])
 401   // CHECK-NEXT:   [[MAT:%.*]] = load <16 x float>, ptr %result, align 4{{$}}
 402   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
 403   // CHECK-NEXT:   store <16 x float> [[MATINS]], ptr %result, align 4
 404   // CHECK-NEXT:   br label %for.inc
 405   m = m + identmatrix;
 406 }
 407
 408 void test_constexpr2(matrix_type<int, 5, 5> &m) {
 409   // CHECK-LABEL: define{{.*}} void @_Z15test_constexpr2Ru11matrix_typeILm5ELm5EiE(
 410   // CHECK:         [[IM:%.*]] = call noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
 411   // NOOPT:         [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4{{$}}
 412   // OPT:           [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 413   // CHECK-NEXT:    [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
 414   // CHECK-NEXT:    [[SUB2:%.*]] = add <25 x i32> [[SUB]], splat (i32 1)
 415   // NOOPT-NEXT:    [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
 416   // OPT-NEXT:      [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 417   // CHECK-NEXT:    store <25 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4
 418   // CHECK-NEXT:    ret void
 419   //
 420
 421   // CHECK-LABEL: define linkonce_odr noundef <25 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj5EEEv(
 422   // CHECK-LABEL: for.body:                                         ; preds = %for.cond
 423   // NOOPT-NEXT:   [[I:%.*]] = load i32, ptr %i, align 4{{$}}
 424   // OPT-NEXT:     [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
 425   // CHECK-NEXT:   [[I_EXT:%.*]] = zext i32 [[I]] to i64
 426   // NOOPT-NEXT:   [[I2:%.*]] = load i32, ptr %i, align 4{{$}}
 427   // OPT-NEXT:     [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}}
 428   // CHECK-NEXT:   [[I2_EXT:%.*]] = zext i32 [[I2]] to i64
 429   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
 430   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
 431   // OPT-NEXT:     [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25
 432   // OPT-NEXT:     call void @llvm.assume(i1 [[CMP]])
 433   // CHECK-NEXT:   [[MAT:%.*]] = load <25 x i32>, ptr %result, align 4{{$}}
 434   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
 435   // CHECK-NEXT:   store <25 x i32> [[MATINS]], ptr %result, align 4
 436   // CHECK-NEXT:   br label %for.inc
 437
 438   m = identmatrix - m + 1;
 439 }