clang/test/CodeGen/matrix-type-operators.c

   1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,NOOPT %s
   2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s
   3
   4
   5 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
   6 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
   7 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
   8 typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2)));
   9
  10 // Floating point matrix/scalar additions.
  11
  12 void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
  13   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b, <25 x double> noundef %c)
  14   // NOOPT:       [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  15   // NOOPT-NEXT:  [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  16   // OPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  17   // OPT-NEXT:    [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  18   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[B]], [[C]]
  19   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
  20
  21   a = b + c;
  22 }
  23
  24 void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
  25   // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
  26   // NOOPT:       [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  27   // NOOPT-NEXT:  [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  28   // OPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  29   // OPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  30   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[A]], [[B]]
  31   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
  32
  33   a += b;
  34 }
  35
  36 void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
  37   // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
  38   // NOOPT:       [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  39   // NOOPT-NEXT:  [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  40   // OPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  41   // OPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  42   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[A]], [[B]]
  43   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
  44
  45   a -= b;
  46 }
  47
  48 void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
  49   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b, <6 x float> noundef %c)
  50   // NOOPT:       [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  51   // NOOPT-NEXT:  [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  52   // OPT:         [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  53   // OPT-NEXT:    [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  54   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[B]], [[C]]
  55   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
  56
  57   a = b + c;
  58 }
  59
  60 void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
  61   // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
  62   // NOOPT:       [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  63   // NOOPT-NEXT:  [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  64   // OPT:         [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  65   // OPT-NEXT:    [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  66   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[A]], [[B]]
  67   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
  68
  69   a += b;
  70 }
  71
  72 void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
  73   // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
  74   // NOOPT:       [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  75   // NOOPT-NEXT:  [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  76   // OPT:         [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  77   // OPT-NEXT:    [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  78   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[A]], [[B]]
  79   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
  80
  81   a -= b;
  82 }
  83
  84 void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
  85   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
  86   // NOOPT:       [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  87   // NOOPT-NEXT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
  88   // OPT:         [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  89   // OPT-NEXT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
  90   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
  91   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
  92   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
  93   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
  94   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
  95
  96   a = a + vf;
  97 }
  98
  99 void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
 100   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
 101   // NOOPT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
 102   // OPT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 103   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
 104   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 105   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 106   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
 107   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
 108   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 109   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
 110
 111   a += vf;
 112 }
 113
 114 void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
 115   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
 116   // NOOPT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
 117   // OPT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 118   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
 119   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 120   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 121   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
 122   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
 123   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 124   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
 125
 126   a -= vf;
 127 }
 128
 129 void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
 130   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
 131   // NOOPT:       [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 132   // NOOPT-NEXT:  [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 133   // OPT:         [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 134   // OPT-NEXT:    [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 135   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
 136   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
 137   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 138   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
 139
 140   a = a + vd;
 141 }
 142
 143 void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
 144   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
 145   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 146   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 147   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 148   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 149   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
 150   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
 151   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 152   // store <25 x double> [[RES]], ptr {{.*}}, align 8
 153   a += vd;
 154 }
 155
 156 void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
 157   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
 158   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 159   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 160   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 161   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 162   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
 163   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
 164   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 165   // store <25 x double> [[RES]], ptr {{.*}}, align 8
 166   a -= vd;
 167 }
 168
 169 void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
 170   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
 171   // NOOPT:       [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
 172   // NOOPT-NEXT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
 173   // OPT:         [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 174   // OPT-NEXT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 175   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
 176   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 177   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 178   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 179
 180   b = b + vf;
 181 }
 182
 183 void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
 184   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
 185   // NOOPT:       [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
 186   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
 187   // OPT:         [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 188   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 189   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
 190   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 191   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 192   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 193   b += vf;
 194 }
 195
 196 void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
 197   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
 198   // NOOPT:       [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
 199   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
 200   // OPT:         [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 201   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 202   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
 203   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 204   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 205   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 206   b -= vf;
 207 }
 208
 209 void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
 210   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
 211   // NOOPT:       [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
 212   // NOOPT-NEXT:  [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 213   // OPT:         [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 214   // OPT-NEXT:    [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 215   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
 216   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
 217   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 218   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 219   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 220
 221   b = b + vd;
 222 }
 223
 224 void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
 225   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
 226   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 227   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 228   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
 229   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
 230   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 231   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
 232   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 233   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 234   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 235   b += vd;
 236 }
 237
 238 void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
 239   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
 240   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 241   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 242   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
 243   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
 244   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 245   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
 246   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 247   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 248   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 249   b -= vd;
 250 }
 251
 252 // Integer matrix/scalar additions
 253
 254 void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
 255   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b, <27 x i32> noundef %c)
 256   // NOOPT:       [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 257   // NOOPT-NEXT:  [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 258   // OPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 259   // OPT-NEXT:    [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 260   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[B]], [[C]]
 261   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr {{.*}}, align 4
 262   a = b + c;
 263 }
 264
 265 void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
 266   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
 267   // NOOPT:       [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 268   // NOOPT-NEXT:  [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 269   // OPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 270   // OPT-NEXT:    [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 271   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[A]], [[B]]
 272   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr {{.*}}, align 4
 273   a += b;
 274 }
 275
 276 void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
 277   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
 278   // NOOPT:       [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 279   // NOOPT-NEXT:  [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 280   // OPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 281   // OPT-NEXT:    [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 282   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[A]], [[B]]
 283   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr {{.*}}, align 4
 284   a -= b;
 285 }
 286
 287 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) {
 288   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b, <8 x i64> noundef %c)
 289   // NOOPT:       [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 290   // NOOPT-NEXT:  [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 291   // OPT:         [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 292   // OPT-NEXT:    [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 293   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[B]], [[C]]
 294   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
 295
 296   a = b + c;
 297 }
 298
 299 void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
 300   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
 301   // NOOPT:       [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 302   // NOOPT-NEXT:  [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 303   // OPT:         [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 304   // OPT-NEXT:    [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 305   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[A]], [[B]]
 306   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
 307
 308   a += b;
 309 }
 310
 311 void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
 312   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
 313   // NOOPT:       [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 314   // OPT:         [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 315   // NOOPT-NEXT:  [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 316   // OPT-NEXT:    [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 317   // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[A]], [[B]]
 318   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
 319
 320   a -= b;
 321 }
 322
 323 void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
 324   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
 325   // NOOPT:        [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 326   // NOOPT-NEXT:   [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 327   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 328   // OPT-NEXT:     [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 329   // CHECK-NEXT:   [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
 330   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i64 0
 331   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 332   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 333   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 334
 335   a = a + vs;
 336 }
 337
 338 void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
 339   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
 340   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 341   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 342   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
 343   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
 344   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 345   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
 346   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 347   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 348   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 349
 350   a += vs;
 351 }
 352
 353 void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
 354   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
 355   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 356   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 357   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
 358   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
 359   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 360   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
 361   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 362   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 363   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 364
 365   a -= vs;
 366 }
 367
 368 void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
 369   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
 370   // NOOPT:        [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 371   // NOOPT-NEXT:   [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 372   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 373   // OPT-NEXT:     [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 374   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 375   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 376   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 377   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 378   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 379
 380   a = a + vli;
 381 }
 382
 383 void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
 384   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
 385   // NOOPT:       [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 386   // OPT:         [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 387   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 388   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
 389   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 390   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 391   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 392   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 393   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 394
 395   a += vli;
 396 }
 397
 398 void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
 399   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
 400   // NOOPT:       [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 401   // OPT:         [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 402   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 403   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
 404   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 405   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 406   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 407   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 408   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 409
 410   a -= vli;
 411 }
 412
 413 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
 414   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
 415   // NOOPT:        [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 416   // NOOPT-NEXT:   [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 417   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 418   // OPT-NEXT:     [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 419   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 420   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 421   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 422   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 423   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 424
 425   a = a + vulli;
 426 }
 427
 428 void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
 429   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
 430   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 431   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 432   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 433   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
 434   // OPT-NEXT:     [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 435   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 436   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 437   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 438   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 439
 440   a += vulli;
 441 }
 442
 443 void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
 444   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
 445   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 446   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 447   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 448   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
 449   // OPT-NEXT:     [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 450   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 451   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 452   // CHECK-NEXT:   [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 453   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 454
 455   a -= vulli;
 456 }
 457
 458 void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
 459   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
 460   // NOOPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 461   // OPT:           [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 462   // CHECK-NEXT:    [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
 463   // NOOPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 464   // OPT-NEXT:      [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 465   // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
 466   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 467   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
 468   // CHECK-NEXT:    store <8 x i64> [[RES]], ptr {{.*}}, align 8
 469
 470   b = vs + b;
 471 }
 472
 473 void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
 474   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
 475   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 476   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 477   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
 478   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
 479   // OPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 480   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
 481   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 482   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 483   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
 484
 485   b += vs;
 486 }
 487
 488 void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
 489   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
 490   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 491   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 492   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
 493   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
 494   // OPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 495   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
 496   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 497   // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 498   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
 499
 500   b -= vs;
 501 }
 502
 503 void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
 504   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
 505   // NOOPT:         [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 506   // NOOPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 507   // OPT:           [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 508   // OPT-NEXT:      [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 509   // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 510   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 511   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
 512   // CHECK-NEXT:    store <8 x i64> [[RES]], ptr {{.*}}, align 8
 513
 514   b = vli + b;
 515 }
 516
 517 void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
 518   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
 519   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 520   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 521   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 522   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 523   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 524   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 525   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 526   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
 527
 528   b += vli;
 529 }
 530
 531 void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
 532   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
 533   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 534   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 535   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 536   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 537   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 538   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 539   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 540   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
 541
 542   b -= vli;
 543 }
 544
 545 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
 546   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long
 547   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 548   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
 549   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 550   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 551   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 552   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 553   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
 554   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
 555   b = vulli + b;
 556 }
 557
 558 void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
 559   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long
 560   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 561   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
 562   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 563   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 564   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 565   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 566   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 567   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
 568
 569   b += vulli;
 570 }
 571
 572 void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
 573   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long
 574   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 575   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
 576   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 577   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 578   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 579   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 580   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 581   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
 582
 583   b -= vulli;
 584 }
 585
 586 // Tests for matrix multiplication.
 587
 588 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
 589   // CHECK-LABEL: @multiply_matrix_matrix_double(
 590   // NOOPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 591   // NOOPT-NEXT:    [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 592   // OPT:           [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 593   // OPT-NEXT:      [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 594   // CHECK-NEXT:    [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
 595   // CHECK-NEXT:    store <25 x double> [[RES]], ptr %a, align 8
 596   // CHECK:         ret void
 597   //
 598
 599   dx5x5_t a;
 600   a = b * c;
 601 }
 602
 603 void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
 604   // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
 605   // NOOPT:        [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 606   // NOOPT-NEXT:   [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 607   // OPT:          [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 608   // OPT-NEXT:     [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 609   // CHECK-NEXT:   [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
 610   // CHECK-NEXT:   store <25 x double> [[RES]], ptr {{.*}}, align 8
 611   // CHECK-NEXT:   ret void
 612   b *= c;
 613 }
 614
 615 typedef int ix3x9_t __attribute__((matrix_type(3, 9)));
 616 typedef int ix9x9_t __attribute__((matrix_type(9, 9)));
 617 // CHECK-LABEL: @multiply_matrix_matrix_int(
 618 // NOOPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 619 // NOOPT-NEXT:    [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 620 // OPT:           [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 621 // OPT-NEXT:      [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 622 // CHECK-NEXT:    [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
 623 // CHECK-NEXT:    store <81 x i32> [[RES]], ptr %a, align 4
 624 // CHECK:         ret void
 625 //
 626 void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) {
 627   ix9x9_t a;
 628   a = b * c;
 629 }
 630
 631 // CHECK-LABEL: @multiply_double_matrix_scalar_float(
 632 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 633 // NOOPT-NEXT:    [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
 634 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 635 // OPT-NEXT:      [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 636 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
 637 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
 638 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 639 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 640 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 641 // CHECK-NEXT:    ret void
 642 //
 643 void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
 644   a = a * s;
 645 }
 646
 647 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
 648 // NOOPT:         [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
 649 // OPT:           [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 650 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
 651 // NOOPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 652 // OPT-NEXT:      [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 653 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
 654 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 655 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 656 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 657 // CHECK-NEXT:    ret void
 658 //
 659 void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
 660   a *= s;
 661 }
 662
 663 // CHECK-LABEL: @multiply_double_matrix_scalar_double(
 664 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 665 // NOOPT-NEXT:    [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 666 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 667 // OPT-NEXT:      [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 668 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
 669 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 670 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 671 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 672 // CHECK-NEXT:    ret void
 673 //
 674 void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
 675   a = a * s;
 676 }
 677
 678 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
 679 // NOOPT:         [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 680 // NOOPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 681 // OPT:           [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 682 // OPT-NEXT:      [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 683 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
 684 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 685 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 686 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 687 // CHECK-NEXT:    ret void
 688 void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) {
 689   a *= s;
 690 }
 691
 692 // CHECK-LABEL: @multiply_float_matrix_scalar_double(
 693 // NOOPT:         [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 694 // OPT:           [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 695 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
 696 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 697 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 698 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
 699 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
 700 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
 701 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
 702 // CHECK-NEXT:    ret void
 703 //
 704 void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
 705   b = s * b;
 706 }
 707
 708 // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
 709 // NOOPT:         [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 710 // OPT:           [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 711 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
 712 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 713 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 714 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
 715 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
 716 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
 717 // store <6 x float> %3, ptr [[MAT_ADDR]], align 4
 718 // ret void
 719 void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) {
 720   b *= s;
 721 }
 722
 723 // CHECK-LABEL: @multiply_int_matrix_scalar_short(
 724 // NOOPT:         [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
 725 // OPT:           [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 726 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
 727 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 728 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 729 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
 730 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 731 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
 732 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 733 // CHECK-NEXT:    ret void
 734 //
 735 void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
 736   b = s * b;
 737 }
 738
 739 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
 740 // NOOPT:        [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
 741 // OPT:          [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 742 // CHECK-NEXT:   [[S_EXT:%.*]] = sext i16 [[S]] to i32
 743 // NOOPT-NEXT:   [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 744 // OPT-NEXT:     [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 745 // CHECK-NEXT:   [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
 746 // CHECK-NEXT:   [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 747 // CHECK-NEXT:   [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
 748 // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 749 // CHECK-NEXT:   ret void
 750 //
 751 void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) {
 752   b *= s;
 753 }
 754
 755 // CHECK-LABEL: @multiply_int_matrix_scalar_ull(
 756 // NOOPT:         [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 757 // OPT:           [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 758 // NOOPT-NEXT:    [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
 759 // OPT-NEXT:      [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 760 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
 761 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
 762 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 763 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
 764 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 765 // CHECK-NEXT:    ret void
 766 //
 767 void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
 768   b = b * s;
 769 }
 770
 771 void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
 772   // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull(
 773   // NOOPT:         [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
 774   // OPT:           [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 775   // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
 776   // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 777   // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 778   // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
 779   // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 780   // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
 781   // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 782   // CHECK-NEXT:    ret void
 783
 784   b *= s;
 785 }
 786
 787 // CHECK-LABEL: @multiply_float_matrix_constant(
 788 // CHECK-NEXT:  entry:
 789 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
 790 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
 791 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
 792 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
 793 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00)
 794 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
 795 // CHECK-NEXT:    ret void
 796 //
 797 void multiply_float_matrix_constant(fx2x3_t a) {
 798   a = a * 2.5;
 799 }
 800
 801 // CHECK-LABEL: @multiply_compound_float_matrix_constant(
 802 // CHECK-NEXT:  entry:
 803 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
 804 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
 805 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
 806 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
 807 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00)
 808 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
 809 // CHECK-NEXT:    ret void
 810 void multiply_compound_float_matrix_constant(fx2x3_t a) {
 811   a *= 2.5;
 812 }
 813
 814 // CHECK-LABEL: @multiply_int_matrix_constant(
 815 // CHECK-NEXT:  entry:
 816 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
 817 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
 818 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
 819 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
 820 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> splat (i32 5), [[MAT]]
 821 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
 822 // CHECK-NEXT:    ret void
 823 //
 824 void multiply_int_matrix_constant(ix9x3_t a) {
 825   a = 5 * a;
 826 }
 827
 828 // CHECK-LABEL: @multiply_compound_int_matrix_constant(
 829 // CHECK-NEXT:  entry:
 830 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
 831 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
 832 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
 833 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
 834 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], splat (i32 5)
 835 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
 836 // CHECK-NEXT:    ret void
 837 //
 838 void multiply_compound_int_matrix_constant(ix9x3_t a) {
 839   a *= 5;
 840 }
 841
 842 // CHECK-LABEL: @divide_double_matrix_scalar_float(
 843 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 844 // NOOPT-NEXT:    [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
 845 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 846 // OPT-NEXT:      [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 847 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
 848 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
 849 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 850 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
 851 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 852 // CHECK-NEXT:    ret void
 853 //
 854 void divide_double_matrix_scalar_float(dx5x5_t a, float s) {
 855   a = a / s;
 856 }
 857
 858 // CHECK-LABEL: @divide_double_matrix_scalar_double(
 859 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 860 // NOOPT-NEXT:    [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 861 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 862 // OPT-NEXT:      [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 863 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
 864 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 865 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
 866 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 867 // CHECK-NEXT:    ret void
 868 //
 869 void divide_double_matrix_scalar_double(dx5x5_t a, double s) {
 870   a = a / s;
 871 }
 872
 873 // CHECK-LABEL: @divide_float_matrix_scalar_double(
 874 // NOOPT:         [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 875 // NOOPT-NEXT:    [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 876 // OPT:           [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 877 // OPT-NEXT:      [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 878 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
 879 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
 880 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
 881 // CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
 882 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
 883 // CHECK-NEXT:    ret void
 884 //
 885 void divide_float_matrix_scalar_double(fx2x3_t b, double s) {
 886   b = b / s;
 887 }
 888
 889 // CHECK-LABEL: @divide_int_matrix_scalar_short(
 890 // NOOPT:         [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 891 // NOOPT-NEXT:    [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
 892 // OPT:           [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 893 // OPT-NEXT:      [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 894 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
 895 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
 896 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 897 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
 898 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 899 // CHECK-NEXT:    ret void
 900 //
 901 void divide_int_matrix_scalar_short(ix9x3_t b, short s) {
 902   b = b / s;
 903 }
 904
 905 // CHECK-LABEL: @divide_int_matrix_scalar_ull(
 906 // NOOPT:         [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 907 // NOOPT-NEXT:    [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
 908 // OPT:           [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 909 // OPT-NEXT:      [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 910 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
 911 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
 912 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 913 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
 914 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 915 // CHECK-NEXT:    ret void
 916 //
 917 void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
 918   b = b / s;
 919 }
 920
 921 // CHECK-LABEL: @divide_ull_matrix_scalar_ull(
 922 // NOOPT:         [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
 923 // NOOPT-NEXT:    [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
 924 // OPT:           [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8, !tbaa !{{[0-9]+}}{{$}}
 925 // OPT-NEXT:      [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 926 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i64 0
 927 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 928 // CHECK-NEXT:    [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
 929 // CHECK-NEXT:    store <8 x i64> [[RES]], ptr [[MAT_ADDR]], align 8
 930 // CHECK-NEXT:    ret void
 931 //
 932 void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) {
 933   b = b / s;
 934 }
 935
 936 // CHECK-LABEL: @divide_float_matrix_constant(
 937 // CHECK-NEXT:  entry:
 938 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
 939 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
 940 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
 941 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
 942 // CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], splat (float 2.500000e+00)
 943 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
 944 // CHECK-NEXT:    ret void
 945 //
 946 void divide_float_matrix_constant(fx2x3_t a) {
 947   a = a / 2.5;
 948 }
 949
 950 // Tests for the matrix type operators.
 951
 952 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
 953 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
 954
 955 // Check that we can use matrix index expression on different floating point
 956 // matrixes and indices.
 957 void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) {
 958   // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double(
 959   // NOOPT:         [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
 960   // OPT:           [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 961   // CHECK-NEXT:    [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 962   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5
 963   // CHECK-NEXT:    store <25 x double> [[MATINS]], ptr {{.*}}, align 8
 964   // CHECK-NEXT:    ret void
 965
 966   a[0ll][1u] = d;
 967 }
 968
 969 void insert_double_matrix_const_idx_i_u_double(dx5x5_t a, double d) {
 970   // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double(
 971   // NOOPT:         [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
 972   // OPT:           [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 973   // CHECK-NEXT:    [[MAT:%.*]] = load <25 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
 974   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21
 975   // CHECK-NEXT:    store <25 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
 976   // CHECK-NEXT:    ret void
 977
 978   a[1][4u] = d;
 979 }
 980
 981 void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b, float e) {
 982   // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float(
 983   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
 984   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 985   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 986   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3
 987   // CHECK-NEXT:    store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
 988   // CHECK-NEXT:    ret void
 989
 990   b[1ull][1] = e;
 991 }
 992
 993 void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) {
 994   // CHECK-LABEL: @insert_float_matrix_idx_i_u_float(
 995   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
 996   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
 997   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 998   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 999   // CHECK-NEXT:    [[J_EXT:%.*]] = sext i32 [[J]] to i64
1000   // NOOPT-NEXT:    [[K:%.*]] = load i32, ptr %k.addr, align 4{{$}}
1001   // OPT-NEXT:      [[K:%.*]] = load i32, ptr %k.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1002   // CHECK-NEXT:    [[K_EXT:%.*]] = zext i32 [[K]] to i64
1003   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
1004   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1005   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1006   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1007   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1008   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1009   // CHECK-NEXT:    store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1010   // CHECK-NEXT:    ret void
1011
1012   b[j][k] = e;
1013 }
1014
1015 void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned long long k) {
1016   // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float(
1017   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1018   // NOOPT-NEXT:    [[J:%.*]] = load i16, ptr %j.addr, align 2{{$}}
1019   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1020   // OPT-NEXT:      [[J:%.*]] = load i16, ptr %j.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1021   // CHECK-NEXT:    [[J_EXT:%.*]] = sext i16 [[J]] to i64
1022   // NOOPT-NEXT:    [[K:%.*]] = load i64, ptr %k.addr, align 8{{$}}
1023   // OPT-NEXT:      [[K:%.*]] = load i64, ptr %k.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1024   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K]], 2
1025   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1026   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1027   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1028   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1029   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1030   // CHECK-NEXT:    store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1031   // CHECK-NEXT:    ret void
1032
1033   (b)[j][k] = e;
1034 }
1035
1036 // Check that we can can use matrix index expressions on integer matrixes.
1037 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
1038 void insert_int_idx_expr(ix9x3_t a, int i) {
1039   // CHECK-LABEL: @insert_int_idx_expr(
1040   // NOOPT:         [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1041   // NOOPT-NEXT:    [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1042   // OPT:           [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1043   // OPT-NEXT:      [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1044   // CHECK-NEXT:    [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
1045   // CHECK-NEXT:    [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
1046   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
1047   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1048   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1049   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1050   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
1051   // CHECK-NEXT:    store <27 x i32> [[MATINS]], ptr [[MAT_ADDR]], align 4
1052   // CHECK-NEXT:    ret void
1053
1054   a[4 + i][1 + 1u] = i;
1055 }
1056
1057 // Check that we can can use matrix index expressions on FP and integer
1058 // matrixes.
1059 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
1060 void insert_float_into_int_matrix(ix9x3_t *a, int i) {
1061   // CHECK-LABEL: @insert_float_into_int_matrix(
1062   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1063   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1064   // NOOPT-NEXT:    [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8{{$}}
1065   // OPT-NEXT:      [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1066   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR1]], align 4{{$}}
1067   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13
1068   // CHECK-NEXT:    store <27 x i32> [[MATINS]], ptr [[MAT_ADDR1]], align 4
1069   // CHECK-NEXT:    ret void
1070
1071   (*a)[4][1] = i;
1072 }
1073
1074 // Check that we can use overloaded matrix index expressions on matrixes with
1075 // matching dimensions, but different element types.
1076 typedef double dx3x3_t __attribute__((matrix_type(3, 3)));
1077 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
1078 void insert_matching_dimensions1(dx3x3_t a, double i) {
1079   // CHECK-LABEL: @insert_matching_dimensions1(
1080   // NOOPT:         [[I:%.*]] = load double, ptr %i.addr, align 8{{$}}
1081   // OPT:           [[I:%.*]] = load double, ptr %i.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1082   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
1083   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5
1084   // CHECK-NEXT:    store <9 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
1085   // CHECK-NEXT:    ret void
1086
1087   a[2u][1u] = i;
1088 }
1089
1090 void insert_matching_dimensions(fx3x3_t b, float e) {
1091   // CHECK-LABEL: @insert_matching_dimensions(
1092   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1093   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1094   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1095   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7
1096   // CHECK-NEXT:    store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1097   // CHECK-NEXT:    ret void
1098
1099   b[1u][2u] = e;
1100 }
1101
1102 double extract_double(dx5x5_t a) {
1103   // CHECK-LABEL: @extract_double(
1104   // NOOPT:         [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
1105   // OPT:           [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
1106   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12
1107   // CHECK-NEXT:    ret double [[MATEXT]]
1108
1109   return a[2][3 - 1u];
1110 }
1111
1112 double extract_float(fx3x3_t b) {
1113   // CHECK-LABEL: @extract_float(
1114   // NOOPT:         [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4{{$}}
1115   // OPT:           [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1116   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5
1117   // CHECK-NEXT:    [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double
1118   // CHECK-NEXT:    ret double [[TO_DOUBLE]]
1119
1120   return b[2][1];
1121 }
1122
1123 int extract_int(ix9x3_t c, unsigned long j) {
1124   // CHECK-LABEL: @extract_int(
1125   // NOOPT:         [[J1:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1126   // NOOPT-NEXT:    [[J2:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1127   // OPT:           [[J1:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1128   // OPT-NEXT:      [[J2:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1129   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J2]], 9
1130   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
1131   // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1132   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1133   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1134   // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1135   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
1136   // CHECK-NEXT:    ret i32 [[MATEXT]]
1137
1138   return c[j][j];
1139 }
1140
1141 typedef double dx3x2_t __attribute__((matrix_type(3, 2)));
1142
1143 double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) {
1144   // CHECK-LABEL: @test_extract_matrix_pointer1(
1145   // NOOPT:         [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1146   // OPT:           [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1147   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
1148   // CHECK-NEXT:    [[IDX:%.*]] = add i64 3, [[J_EXT]]
1149   // NOOPT-NEXT:    [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1150   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
1151   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1152   // OPT-NEXT:      [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1153   // CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 1
1154   // NOOPT-NEXT:    [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1155   // OPT-NEXT:      [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1156   // CHECK-NEXT:    [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 2
1157   // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1158   // OPT-NEXT:      [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1159   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
1160   // CHECK-NEXT:    ret double [[MATEXT]]
1161
1162   return ptr[1][2][j][1];
1163 }
1164
1165 double test_extract_matrix_pointer2(dx3x2_t **ptr) {
1166   // CHECK-LABEL: @test_extract_matrix_pointer2(
1167   // CHECK-NEXT:  entry:
1168   // NOOPT:         [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1169   // OPT:           [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1170   // CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 4
1171   // NOOPT-NEXT:    [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1172   // OPT-NEXT:      [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1173   // CHECK-NEXT:    [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 6
1174   // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1175   // OPT-NEXT:      [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1176   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5
1177   // CHECK-NEXT:    ret double [[MATEXT]]
1178
1179   return (*(*(ptr + 4) + 6))[2][1 * 3 - 2];
1180 }
1181
1182 void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) {
1183   // CHECK-LABEL: @insert_extract(
1184   // NOOPT:         [[K:%.*]] = load i16, ptr %k.addr, align 2{{$}}
1185   // OPT:           [[K:%.*]] = load i16, ptr %k.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1186   // CHECK-NEXT:    [[K_EXT:%.*]] = sext i16 [[K]] to i64
1187   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
1188   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], 0
1189   // NOOPT-NEXT:    [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1190   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9
1191   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1192   // OPT-NEXT:      [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
1193   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]]
1194   // NOOPT-NEXT:    [[J:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1195   // OPT-NEXT:      [[J:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1196   // CHECK-NEXT:    [[IDX3:%.*]] = mul i64 [[J]], 3
1197   // CHECK-NEXT:    [[IDX4:%.*]] = add i64 [[IDX3]], 2
1198   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9
1199   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1200   // CHECK-NEXT:    [[MAT2:%.*]] = load <9 x float>, ptr [[MAT_ADDR]], align 4{{$}}
1201   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
1202   // CHECK-NEXT:    store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1203   // CHECK-NEXT:    ret void
1204
1205   b[2][j] = b[0][k];
1206 }
1207
1208 void insert_compound_stmt(dx5x5_t a) {
1209   // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a)
1210   // CHECK:        [[A:%.*]] = load <25 x double>, ptr [[A_PTR:%.*]], align 8{{$}}
1211   // CHECK-NEXT:   [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
1212   // CHECK-NEXT:   [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
1213   // CHECK-NEXT:   [[A2:%.*]] = load <25 x double>, ptr [[A_PTR]], align 8{{$}}
1214   // CHECK-NEXT:   [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17
1215   // CHECK-NEXT:   store <25 x double> [[INS]], ptr [[A_PTR]], align 8
1216   // CHECK-NEXT:   ret void
1217
1218   a[2][3] -= 1.0;
1219 }
1220
1221 struct Foo {
1222   fx2x3_t mat;
1223 };
1224
1225 void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j) {
1226   // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(ptr noundef %a, float noundef %f, i32 noundef %i, i32 noundef %j)
1227   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1228   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1229   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
1230   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1231   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1232   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
1233   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
1234   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
1235   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1236   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1237   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1238   // CHECK-NEXT:    [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
1239   // CHECK-NEXT:    [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
1240   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1241   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1242   // CHECK-NEXT:    [[MAT2:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1243   // CHECK-NEXT:    [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
1244   // CHECK-NEXT:    store <6 x float> [[INS]], ptr %mat, align 4
1245   // CHECK-NEXT:    ret void
1246
1247   a->mat[i][j] += f;
1248 }
1249
1250 void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) {
1251   // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> noundef %a, i32 noundef %i, i32 noundef %j, <25 x double> noundef %b)
1252   // NOOPT:       [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1253   // OPT:         [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1254   // CHECK-NEXT:  [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
1255   // NOOPT-NEXT:  [[J1:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1256   // OPT-NEXT:    [[J1:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1257   // CHECK-NEXT:  [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
1258   // CHECK-NEXT:  [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
1259   // CHECK-NEXT:  [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
1260   // NOOPT-NEXT:  [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
1261   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27
1262   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1263   // OPT-NEXT:    [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1264   // CHECK-NEXT:  [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
1265   // CHECK-NEXT:  [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
1266   // NOOPT-NEXT:  [[J2:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1267   // OPT-NEXT:    [[J2:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1268   // CHECK-NEXT:  [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
1269   // NOOPT-NEXT:  [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1270   // OPT-NEXT:    [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1271   // CHECK-NEXT:  [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
1272   // CHECK-NEXT:  [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
1273   // CHECK-NEXT:  [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
1274   // NOOPT-NEXT:  [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1275   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27
1276   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1277   // OPT-NEXT:    [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1278   // CHECK-NEXT:  [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
1279   // CHECK-NEXT:  [[MI3:%.*]] = add nsw i32 [[MI2]], 2
1280   // CHECK-NEXT:  [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
1281   // CHECK-NEXT:  [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
1282   // CHECK-NEXT:  [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
1283   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25
1284   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1285   // CHECK-NEXT:  [[B:%.*]] = load <25 x double>, ptr [[B_PTR:%.*]], align 8{{$}}
1286   // CHECK-NEXT:  [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
1287   // CHECK-NEXT:  store <25 x double> [[INS]], ptr [[B_PTR]], align 8
1288   b[a[i][j]][a[j][i] + 2] = 1.5;
1289 }