clang/test/CodeGen/matrix-type-operators.c

   1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,NOOPT %s
   2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s
   3
   4
   5 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
   6 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
   7 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
   8 typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2)));
   9
  10 // Floating point matrix/scalar additions.
  11
  12 void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
  13   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b, <25 x double> noundef %c)
  14   // NOOPT:       [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  15   // NOOPT-NEXT:  [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  16   // OPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  17   // OPT-NEXT:    [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  18   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[B]], [[C]]
  19   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
  20
  21   a = b + c;
  22 }
  23
  24 void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
  25   // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
  26   // NOOPT:       [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  27   // NOOPT-NEXT:  [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  28   // OPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  29   // OPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  30   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[A]], [[B]]
  31   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
  32
  33   a += b;
  34 }
  35
  36 void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
  37   // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
  38   // NOOPT:       [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  39   // NOOPT-NEXT:  [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  40   // OPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  41   // OPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  42   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[A]], [[B]]
  43   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
  44
  45   a -= b;
  46 }
  47
  48 void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
  49   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b, <6 x float> noundef %c)
  50   // NOOPT:       [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  51   // NOOPT-NEXT:  [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  52   // OPT:         [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  53   // OPT-NEXT:    [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  54   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[B]], [[C]]
  55   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
  56
  57   a = b + c;
  58 }
  59
  60 void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
  61   // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
  62   // NOOPT:       [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  63   // NOOPT-NEXT:  [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  64   // OPT:         [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  65   // OPT-NEXT:    [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  66   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[A]], [[B]]
  67   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
  68
  69   a += b;
  70 }
  71
  72 void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
  73   // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
  74   // NOOPT:       [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  75   // NOOPT-NEXT:  [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
  76   // OPT:         [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  77   // OPT-NEXT:    [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  78   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[A]], [[B]]
  79   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
  80
  81   a -= b;
  82 }
  83
  84 void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
  85   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
  86   // NOOPT:       [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
  87   // NOOPT-NEXT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
  88   // OPT:         [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
  89   // OPT-NEXT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
  90   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
  91   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
  92   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
  93   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
  94   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
  95
  96   a = a + vf;
  97 }
  98
  99 void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
 100   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
 101   // NOOPT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
 102   // OPT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 103   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
 104   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 105   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 106   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
 107   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
 108   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 109   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
 110
 111   a += vf;
 112 }
 113
 114 void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
 115   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
 116   // NOOPT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
 117   // OPT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 118   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
 119   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 120   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 121   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
 122   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
 123   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 124   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
 125
 126   a -= vf;
 127 }
 128
 129 void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
 130   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
 131   // NOOPT:       [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 132   // NOOPT-NEXT:  [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 133   // OPT:         [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 134   // OPT-NEXT:    [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 135   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
 136   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
 137   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 138   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
 139
 140   a = a + vd;
 141 }
 142
 143 void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
 144   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
 145   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 146   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 147   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 148   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 149   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
 150   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
 151   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 152   // store <25 x double> [[RES]], ptr {{.*}}, align 8
 153   a += vd;
 154 }
 155
 156 void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
 157   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
 158   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 159   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 160   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 161   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 162   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
 163   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
 164   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
 165   // store <25 x double> [[RES]], ptr {{.*}}, align 8
 166   a -= vd;
 167 }
 168
 169 void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
 170   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
 171   // NOOPT:       [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
 172   // NOOPT-NEXT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
 173   // OPT:         [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 174   // OPT-NEXT:    [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 175   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
 176   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 177   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 178   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 179
 180   b = b + vf;
 181 }
 182
 183 void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
 184   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
 185   // NOOPT:       [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
 186   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
 187   // OPT:         [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 188   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 189   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
 190   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 191   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 192   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 193   b += vf;
 194 }
 195
 196 void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
 197   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
 198   // NOOPT:       [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
 199   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
 200   // OPT:         [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 201   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 202   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
 203   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 204   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 205   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 206   b -= vf;
 207 }
 208
 209 void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
 210   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
 211   // NOOPT:       [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
 212   // NOOPT-NEXT:  [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 213   // OPT:         [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 214   // OPT-NEXT:    [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 215   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
 216   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
 217   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 218   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 219   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 220
 221   b = b + vd;
 222 }
 223
 224 void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
 225   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
 226   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 227   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 228   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
 229   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
 230   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 231   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
 232   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 233   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 234   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 235   b += vd;
 236 }
 237
 238 void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
 239   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
 240   // NOOPT:       [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
 241   // OPT:         [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 242   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
 243   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
 244   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 245   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
 246   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
 247   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
 248   // CHECK-NEXT:  store <6 x float> [[RES]], ptr {{.*}}, align 4
 249   b -= vd;
 250 }
 251
 252 // Integer matrix/scalar additions
 253
 254 void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
 255   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b, <27 x i32> noundef %c)
 256   // NOOPT:       [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 257   // NOOPT-NEXT:  [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 258   // OPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 259   // OPT-NEXT:    [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 260   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[B]], [[C]]
 261   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr {{.*}}, align 4
 262   a = b + c;
 263 }
 264
 265 void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
 266   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
 267   // NOOPT:       [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 268   // NOOPT-NEXT:  [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 269   // OPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 270   // OPT-NEXT:    [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 271   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[A]], [[B]]
 272   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr {{.*}}, align 4
 273   a += b;
 274 }
 275
 276 void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
 277   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
 278   // NOOPT:       [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 279   // NOOPT-NEXT:  [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 280   // OPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 281   // OPT-NEXT:    [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 282   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[A]], [[B]]
 283   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr {{.*}}, align 4
 284   a -= b;
 285 }
 286
 287 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) {
 288   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b, <8 x i64> noundef %c)
 289   // NOOPT:       [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 290   // NOOPT-NEXT:  [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 291   // OPT:         [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 292   // OPT-NEXT:    [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 293   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[B]], [[C]]
 294   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
 295
 296   a = b + c;
 297 }
 298
 299 void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
 300   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
 301   // NOOPT:       [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 302   // NOOPT-NEXT:  [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 303   // OPT:         [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 304   // OPT-NEXT:    [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 305   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[A]], [[B]]
 306   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
 307
 308   a += b;
 309 }
 310
 311 void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
 312   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
 313   // NOOPT:       [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 314   // OPT:         [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 315   // NOOPT-NEXT:  [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 316   // OPT-NEXT:    [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 317   // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[A]], [[B]]
 318   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
 319
 320   a -= b;
 321 }
 322
 323 void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
 324   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
 325   // NOOPT:        [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 326   // NOOPT-NEXT:   [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 327   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 328   // OPT-NEXT:     [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 329   // CHECK-NEXT:   [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
 330   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i64 0
 331   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 332   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 333   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 334
 335   a = a + vs;
 336 }
 337
 338 void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
 339   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
 340   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 341   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 342   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
 343   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
 344   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 345   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
 346   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 347   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 348   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 349
 350   a += vs;
 351 }
 352
 353 void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
 354   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
 355   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 356   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 357   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
 358   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
 359   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 360   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
 361   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 362   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 363   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 364
 365   a -= vs;
 366 }
 367
 368 void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
 369   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
 370   // NOOPT:        [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 371   // NOOPT-NEXT:   [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 372   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 373   // OPT-NEXT:     [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 374   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 375   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 376   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 377   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 378   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 379
 380   a = a + vli;
 381 }
 382
 383 void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
 384   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
 385   // NOOPT:       [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 386   // OPT:         [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 387   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 388   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
 389   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 390   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 391   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 392   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 393   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 394
 395   a += vli;
 396 }
 397
 398 void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
 399   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
 400   // NOOPT:       [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 401   // OPT:         [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 402   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 403   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
 404   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 405   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 406   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 407   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 408   // CHECK-NEXT:  store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 409
 410   a -= vli;
 411 }
 412
 413 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
 414   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
 415   // NOOPT:        [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 416   // NOOPT-NEXT:   [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 417   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 418   // OPT-NEXT:     [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 419   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 420   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 421   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 422   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 423   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 424
 425   a = a + vulli;
 426 }
 427
 428 void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
 429   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
 430   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 431   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 432   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 433   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
 434   // OPT-NEXT:     [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 435   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 436   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 437   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 438   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 439
 440   a += vulli;
 441 }
 442
 443 void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
 444   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
 445   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 446   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 447   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
 448   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
 449   // OPT-NEXT:     [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 450   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
 451   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
 452   // CHECK-NEXT:   [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
 453   // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 454
 455   a -= vulli;
 456 }
 457
 458 void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
 459   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
 460   // NOOPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 461   // OPT:           [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 462   // CHECK-NEXT:    [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
 463   // NOOPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 464   // OPT-NEXT:      [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 465   // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
 466   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 467   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
 468   // CHECK-NEXT:    store <8 x i64> [[RES]], ptr {{.*}}, align 8
 469
 470   b = vs + b;
 471 }
 472
 473 void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
 474   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
 475   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 476   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 477   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
 478   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
 479   // OPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 480   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
 481   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 482   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 483   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
 484
 485   b += vs;
 486 }
 487
 488 void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
 489   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
 490   // NOOPT:       [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
 491   // OPT:         [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 492   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
 493   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
 494   // OPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 495   // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
 496   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 497   // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 498   // CHECK-NEXT:  store <8 x i64> [[RES]], ptr {{.*}}, align 8
 499
 500   b -= vs;
 501 }
 502
 503 void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
 504   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
 505   // NOOPT:         [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 506   // NOOPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 507   // OPT:           [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 508   // OPT-NEXT:      [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 509   // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 510   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 511   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
 512   // CHECK-NEXT:    store <8 x i64> [[RES]], ptr {{.*}}, align 8
 513
 514   b = vli + b;
 515 }
 516
 517 void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
 518   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
 519   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 520   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 521   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 522   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 523   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 524   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 525   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 526   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
 527
 528   b += vli;
 529 }
 530
 531 void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
 532   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
 533   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
 534   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 535   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
 536   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 537   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 538   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 539   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 540   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
 541
 542   b -= vli;
 543 }
 544
 545 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
 546   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long
 547   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 548   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
 549   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 550   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 551   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 552   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 553   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
 554   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
 555   b = vulli + b;
 556 }
 557
 558 void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
 559   // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long
 560   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 561   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
 562   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 563   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 564   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 565   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 566   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 567   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
 568
 569   b += vulli;
 570 }
 571
 572 void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
 573   // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long
 574   // NOOPT:        [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
 575   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
 576   // OPT:          [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 577   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 578   // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
 579   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
 580   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
 581   // CHECK-NEXT:   store <8 x i64> [[RES]], ptr {{.*}}, align 8
 582
 583   b -= vulli;
 584 }
 585
 586 // Tests for matrix multiplication.
 587
 588 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
 589   // CHECK-LABEL: @multiply_matrix_matrix_double(
 590   // NOOPT:         [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 591   // NOOPT-NEXT:    [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 592   // OPT:           [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 593   // OPT-NEXT:      [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 594   // CHECK-NEXT:    [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
 595   // CHECK-NEXT:    store <25 x double> [[RES]], ptr %a, align 8
 596   // CHECK:         ret void
 597   //
 598
 599   dx5x5_t a;
 600   a = b * c;
 601 }
 602
 603 void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
 604   // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
 605   // NOOPT:        [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 606   // NOOPT-NEXT:   [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 607   // OPT:          [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 608   // OPT-NEXT:     [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 609   // CHECK-NEXT:   [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
 610   // CHECK-NEXT:   store <25 x double> [[RES]], ptr {{.*}}, align 8
 611   // CHECK-NEXT:   ret void
 612   b *= c;
 613 }
 614
 615 typedef int ix3x9_t __attribute__((matrix_type(3, 9)));
 616 typedef int ix9x9_t __attribute__((matrix_type(9, 9)));
 617 // CHECK-LABEL: @multiply_matrix_matrix_int(
 618 // NOOPT:         [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 619 // NOOPT-NEXT:    [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
 620 // OPT:           [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 621 // OPT-NEXT:      [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
 622 // CHECK-NEXT:    [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
 623 // CHECK-NEXT:    store <81 x i32> [[RES]], ptr %a, align 4
 624 // CHECK:         ret void
 625 //
 626 void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) {
 627   ix9x9_t a;
 628   a = b * c;
 629 }
 630
 631 // CHECK-LABEL: @multiply_double_matrix_scalar_float(
 632 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 633 // NOOPT-NEXT:    [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
 634 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 635 // OPT-NEXT:      [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 636 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
 637 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
 638 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 639 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 640 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 641 // CHECK-NEXT:    ret void
 642 //
 643 void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
 644   a = a * s;
 645 }
 646
 647 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
 648 // NOOPT:         [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
 649 // OPT:           [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 650 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
 651 // NOOPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 652 // OPT-NEXT:      [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 653 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
 654 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 655 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 656 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 657 // CHECK-NEXT:    ret void
 658 //
 659 void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
 660   a *= s;
 661 }
 662
 663 // CHECK-LABEL: @multiply_double_matrix_scalar_double(
 664 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 665 // NOOPT-NEXT:    [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 666 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 667 // OPT-NEXT:      [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 668 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
 669 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 670 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 671 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 672 // CHECK-NEXT:    ret void
 673 //
 674 void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
 675   a = a * s;
 676 }
 677
 678 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
 679 // NOOPT:         [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 680 // NOOPT-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 681 // OPT:           [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 682 // OPT-NEXT:      [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 683 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
 684 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 685 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 686 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 687 // CHECK-NEXT:    ret void
 688 void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) {
 689   a *= s;
 690 }
 691
 692 // CHECK-LABEL: @multiply_float_matrix_scalar_double(
 693 // NOOPT:         [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 694 // OPT:           [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 695 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
 696 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 697 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 698 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
 699 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
 700 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
 701 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
 702 // CHECK-NEXT:    ret void
 703 //
 704 void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
 705   b = s * b;
 706 }
 707
 708 // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
 709 // NOOPT:         [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 710 // OPT:           [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 711 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
 712 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 713 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 714 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
 715 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
 716 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
 717 // store <6 x float> %3, ptr [[MAT_ADDR]], align 4
 718 // ret void
 719 void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) {
 720   b *= s;
 721 }
 722
 723 // CHECK-LABEL: @multiply_int_matrix_scalar_short(
 724 // NOOPT:         [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
 725 // OPT:           [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 726 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
 727 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 728 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 729 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
 730 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 731 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
 732 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 733 // CHECK-NEXT:    ret void
 734 //
 735 void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
 736   b = s * b;
 737 }
 738
 739 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
 740 // NOOPT:        [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
 741 // OPT:          [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 742 // CHECK-NEXT:   [[S_EXT:%.*]] = sext i16 [[S]] to i32
 743 // NOOPT-NEXT:   [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 744 // OPT-NEXT:     [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 745 // CHECK-NEXT:   [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
 746 // CHECK-NEXT:   [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 747 // CHECK-NEXT:   [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
 748 // CHECK-NEXT:   store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 749 // CHECK-NEXT:   ret void
 750 //
 751 void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) {
 752   b *= s;
 753 }
 754
 755 // CHECK-LABEL: @multiply_int_matrix_scalar_ull(
 756 // NOOPT:         [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 757 // OPT:           [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 758 // NOOPT-NEXT:    [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
 759 // OPT-NEXT:      [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 760 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
 761 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
 762 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 763 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
 764 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 765 // CHECK-NEXT:    ret void
 766 //
 767 void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
 768   b = b * s;
 769 }
 770
 771 void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
 772   // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull(
 773   // NOOPT:         [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
 774   // OPT:           [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 775   // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
 776   // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 777   // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 778   // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
 779   // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 780   // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
 781   // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 782   // CHECK-NEXT:    ret void
 783
 784   b *= s;
 785 }
 786
 787 // CHECK-LABEL: @multiply_float_matrix_constant(
 788 // CHECK-NEXT:  entry:
 789 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
 790 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
 791 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
 792 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
 793 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
 794 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
 795 // CHECK-NEXT:    ret void
 796 //
 797 void multiply_float_matrix_constant(fx2x3_t a) {
 798   a = a * 2.5;
 799 }
 800
 801 // CHECK-LABEL: @multiply_compound_float_matrix_constant(
 802 // CHECK-NEXT:  entry:
 803 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
 804 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
 805 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
 806 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
 807 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
 808 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
 809 // CHECK-NEXT:    ret void
 810 void multiply_compound_float_matrix_constant(fx2x3_t a) {
 811   a *= 2.5;
 812 }
 813
 814 // CHECK-LABEL: @multiply_int_matrix_constant(
 815 // CHECK-NEXT:  entry:
 816 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
 817 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
 818 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
 819 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
 820 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, [[MAT]]
 821 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
 822 // CHECK-NEXT:    ret void
 823 //
 824 void multiply_int_matrix_constant(ix9x3_t a) {
 825   a = 5 * a;
 826 }
 827
 828 // CHECK-LABEL: @multiply_compound_int_matrix_constant(
 829 // CHECK-NEXT:  entry:
 830 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
 831 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
 832 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
 833 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
 834 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 835 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
 836 // CHECK-NEXT:    ret void
 837 //
 838 void multiply_compound_int_matrix_constant(ix9x3_t a) {
 839   a *= 5;
 840 }
 841
 842 // CHECK-LABEL: @divide_double_matrix_scalar_float(
 843 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 844 // NOOPT-NEXT:    [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
 845 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 846 // OPT-NEXT:      [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 847 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
 848 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
 849 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 850 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
 851 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 852 // CHECK-NEXT:    ret void
 853 //
 854 void divide_double_matrix_scalar_float(dx5x5_t a, float s) {
 855   a = a / s;
 856 }
 857
 858 // CHECK-LABEL: @divide_double_matrix_scalar_double(
 859 // NOOPT:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 860 // NOOPT-NEXT:    [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 861 // OPT:           [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 862 // OPT-NEXT:      [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 863 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
 864 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 865 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
 866 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
 867 // CHECK-NEXT:    ret void
 868 //
 869 void divide_double_matrix_scalar_double(dx5x5_t a, double s) {
 870   a = a / s;
 871 }
 872
 873 // CHECK-LABEL: @divide_float_matrix_scalar_double(
 874 // NOOPT:         [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 875 // NOOPT-NEXT:    [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
 876 // OPT:           [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 877 // OPT-NEXT:      [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 878 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
 879 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
 880 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
 881 // CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
 882 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
 883 // CHECK-NEXT:    ret void
 884 //
 885 void divide_float_matrix_scalar_double(fx2x3_t b, double s) {
 886   b = b / s;
 887 }
 888
 889 // CHECK-LABEL: @divide_int_matrix_scalar_short(
 890 // NOOPT:         [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 891 // NOOPT-NEXT:    [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
 892 // OPT:           [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 893 // OPT-NEXT:      [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 894 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
 895 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
 896 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 897 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
 898 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 899 // CHECK-NEXT:    ret void
 900 //
 901 void divide_int_matrix_scalar_short(ix9x3_t b, short s) {
 902   b = b / s;
 903 }
 904
 905 // CHECK-LABEL: @divide_int_matrix_scalar_ull(
 906 // NOOPT:         [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 907 // NOOPT-NEXT:    [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
 908 // OPT:           [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 909 // OPT-NEXT:      [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 910 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
 911 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
 912 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 913 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
 914 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
 915 // CHECK-NEXT:    ret void
 916 //
 917 void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
 918   b = b / s;
 919 }
 920
 921 // CHECK-LABEL: @divide_ull_matrix_scalar_ull(
 922 // NOOPT:         [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
 923 // NOOPT-NEXT:    [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
 924 // OPT:           [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8, !tbaa !{{[0-9]+}}{{$}}
 925 // OPT-NEXT:      [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 926 // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i64 0
 927 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 928 // CHECK-NEXT:    [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
 929 // CHECK-NEXT:    store <8 x i64> [[RES]], ptr [[MAT_ADDR]], align 8
 930 // CHECK-NEXT:    ret void
 931 //
 932 void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) {
 933   b = b / s;
 934 }
 935
 936 // CHECK-LABEL: @divide_float_matrix_constant(
 937 // CHECK-NEXT:  entry:
 938 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
 939 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
 940 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
 941 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
 942 // CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
 943 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
 944 // CHECK-NEXT:    ret void
 945 //
 946 void divide_float_matrix_constant(fx2x3_t a) {
 947   a = a / 2.5;
 948 }
 949
 950 // Tests for the matrix type operators.
 951
 952 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
 953 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
 954
 955 // Check that we can use matrix index expression on different floating point
 956 // matrixes and indices.
 957 void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) {
 958   // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double(
 959   // NOOPT:         [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
 960   // OPT:           [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 961   // CHECK-NEXT:    [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
 962   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5
 963   // CHECK-NEXT:    store <25 x double> [[MATINS]], ptr {{.*}}, align 8
 964   // CHECK-NEXT:    ret void
 965
 966   a[0ll][1u] = d;
 967 }
 968
 969 void insert_double_matrix_const_idx_i_u_double(dx5x5_t a, double d) {
 970   // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double(
 971   // NOOPT:         [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
 972   // OPT:           [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 973   // CHECK-NEXT:    [[MAT:%.*]] = load <25 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
 974   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21
 975   // CHECK-NEXT:    store <25 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
 976   // CHECK-NEXT:    ret void
 977
 978   a[1][4u] = d;
 979 }
 980
 981 void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b, float e) {
 982   // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float(
 983   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
 984   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 985   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
 986   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3
 987   // CHECK-NEXT:    store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
 988   // CHECK-NEXT:    ret void
 989
 990   b[1ull][1] = e;
 991 }
 992
 993 void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) {
 994   // CHECK-LABEL: @insert_float_matrix_idx_i_u_float(
 995   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
 996   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
 997   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 998   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 999   // CHECK-NEXT:    [[J_EXT:%.*]] = sext i32 [[J]] to i64
1000   // NOOPT-NEXT:    [[K:%.*]] = load i32, ptr %k.addr, align 4{{$}}
1001   // OPT-NEXT:      [[K:%.*]] = load i32, ptr %k.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1002   // CHECK-NEXT:    [[K_EXT:%.*]] = zext i32 [[K]] to i64
1003   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
1004   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1005   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1006   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1007   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1008   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1009   // CHECK-NEXT:    store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1010   // CHECK-NEXT:    ret void
1011
1012   b[j][k] = e;
1013 }
1014
1015 void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned long long k) {
1016   // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float(
1017   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1018   // NOOPT-NEXT:    [[J:%.*]] = load i16, ptr %j.addr, align 2{{$}}
1019   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1020   // OPT-NEXT:      [[J:%.*]] = load i16, ptr %j.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1021   // CHECK-NEXT:    [[J_EXT:%.*]] = sext i16 [[J]] to i64
1022   // NOOPT-NEXT:    [[K:%.*]] = load i64, ptr %k.addr, align 8{{$}}
1023   // OPT-NEXT:      [[K:%.*]] = load i64, ptr %k.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1024   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K]], 2
1025   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1026   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1027   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1028   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1029   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1030   // CHECK-NEXT:    store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1031   // CHECK-NEXT:    ret void
1032
1033   (b)[j][k] = e;
1034 }
1035
1036 // Check that we can can use matrix index expressions on integer matrixes.
1037 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
1038 void insert_int_idx_expr(ix9x3_t a, int i) {
1039   // CHECK-LABEL: @insert_int_idx_expr(
1040   // NOOPT:         [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1041   // NOOPT-NEXT:    [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1042   // OPT:           [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1043   // OPT-NEXT:      [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1044   // CHECK-NEXT:    [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
1045   // CHECK-NEXT:    [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
1046   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
1047   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1048   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1049   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1050   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
1051   // CHECK-NEXT:    store <27 x i32> [[MATINS]], ptr [[MAT_ADDR]], align 4
1052   // CHECK-NEXT:    ret void
1053
1054   a[4 + i][1 + 1u] = i;
1055 }
1056
1057 // Check that we can can use matrix index expressions on FP and integer
1058 // matrixes.
1059 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
1060 void insert_float_into_int_matrix(ix9x3_t *a, int i) {
1061   // CHECK-LABEL: @insert_float_into_int_matrix(
1062   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1063   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1064   // NOOPT-NEXT:    [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8{{$}}
1065   // OPT-NEXT:      [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1066   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR1]], align 4{{$}}
1067   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13
1068   // CHECK-NEXT:    store <27 x i32> [[MATINS]], ptr [[MAT_ADDR1]], align 4
1069   // CHECK-NEXT:    ret void
1070
1071   (*a)[4][1] = i;
1072 }
1073
1074 // Check that we can use overloaded matrix index expressions on matrixes with
1075 // matching dimensions, but different element types.
1076 typedef double dx3x3_t __attribute__((matrix_type(3, 3)));
1077 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
1078 void insert_matching_dimensions1(dx3x3_t a, double i) {
1079   // CHECK-LABEL: @insert_matching_dimensions1(
1080   // NOOPT:         [[I:%.*]] = load double, ptr %i.addr, align 8{{$}}
1081   // OPT:           [[I:%.*]] = load double, ptr %i.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1082   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
1083   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5
1084   // CHECK-NEXT:    store <9 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
1085   // CHECK-NEXT:    ret void
1086
1087   a[2u][1u] = i;
1088 }
1089
1090 void insert_matching_dimensions(fx3x3_t b, float e) {
1091   // CHECK-LABEL: @insert_matching_dimensions(
1092   // NOOPT:         [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1093   // OPT:           [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1094   // CHECK-NEXT:    [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1095   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7
1096   // CHECK-NEXT:    store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1097   // CHECK-NEXT:    ret void
1098
1099   b[1u][2u] = e;
1100 }
1101
1102 double extract_double(dx5x5_t a) {
1103   // CHECK-LABEL: @extract_double(
1104   // NOOPT:         [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
1105   // OPT:           [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
1106   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12
1107   // CHECK-NEXT:    ret double [[MATEXT]]
1108
1109   return a[2][3 - 1u];
1110 }
1111
1112 double extract_float(fx3x3_t b) {
1113   // CHECK-LABEL: @extract_float(
1114   // NOOPT:         [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4{{$}}
1115   // OPT:           [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1116   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5
1117   // CHECK-NEXT:    [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double
1118   // CHECK-NEXT:    ret double [[TO_DOUBLE]]
1119
1120   return b[2][1];
1121 }
1122
1123 int extract_int(ix9x3_t c, unsigned long j) {
1124   // CHECK-LABEL: @extract_int(
1125   // NOOPT:         [[J1:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1126   // NOOPT-NEXT:    [[J2:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1127   // OPT:           [[J1:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1128   // OPT-NEXT:      [[J2:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1129   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J2]], 9
1130   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
1131   // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1132   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1133   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1134   // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1135   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
1136   // CHECK-NEXT:    ret i32 [[MATEXT]]
1137
1138   return c[j][j];
1139 }
1140
1141 typedef double dx3x2_t __attribute__((matrix_type(3, 2)));
1142
1143 double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) {
1144   // CHECK-LABEL: @test_extract_matrix_pointer1(
1145   // NOOPT:         [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1146   // OPT:           [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1147   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
1148   // CHECK-NEXT:    [[IDX:%.*]] = add i64 3, [[J_EXT]]
1149   // NOOPT-NEXT:    [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1150   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
1151   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1152   // OPT-NEXT:      [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1153   // CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 1
1154   // NOOPT-NEXT:    [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1155   // OPT-NEXT:      [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1156   // CHECK-NEXT:    [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 2
1157   // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1158   // OPT-NEXT:      [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1159   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
1160   // CHECK-NEXT:    ret double [[MATEXT]]
1161
1162   return ptr[1][2][j][1];
1163 }
1164
1165 double test_extract_matrix_pointer2(dx3x2_t **ptr) {
1166   // CHECK-LABEL: @test_extract_matrix_pointer2(
1167   // CHECK-NEXT:  entry:
1168   // NOOPT:         [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1169   // OPT:           [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1170   // CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 4
1171   // NOOPT-NEXT:    [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1172   // OPT-NEXT:      [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1173   // CHECK-NEXT:    [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 6
1174   // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1175   // OPT-NEXT:      [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1176   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5
1177   // CHECK-NEXT:    ret double [[MATEXT]]
1178
1179   return (*(*(ptr + 4) + 6))[2][1 * 3 - 2];
1180 }
1181
1182 void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) {
1183   // CHECK-LABEL: @insert_extract(
1184   // NOOPT:         [[K:%.*]] = load i16, ptr %k.addr, align 2{{$}}
1185   // OPT:           [[K:%.*]] = load i16, ptr %k.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1186   // CHECK-NEXT:    [[K_EXT:%.*]] = sext i16 [[K]] to i64
1187   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
1188   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], 0
1189   // NOOPT-NEXT:    [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1190   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9
1191   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1192   // OPT-NEXT:      [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
1193   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]]
1194   // NOOPT-NEXT:    [[J:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1195   // OPT-NEXT:      [[J:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1196   // CHECK-NEXT:    [[IDX3:%.*]] = mul i64 [[J]], 3
1197   // CHECK-NEXT:    [[IDX4:%.*]] = add i64 [[IDX3]], 2
1198   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9
1199   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1200   // CHECK-NEXT:    [[MAT2:%.*]] = load <9 x float>, ptr [[MAT_ADDR]], align 4{{$}}
1201   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
1202   // CHECK-NEXT:    store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1203   // CHECK-NEXT:    ret void
1204
1205   b[2][j] = b[0][k];
1206 }
1207
1208 void insert_compound_stmt(dx5x5_t a) {
1209   // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a)
1210   // CHECK:        [[A:%.*]] = load <25 x double>, ptr [[A_PTR:%.*]], align 8{{$}}
1211   // CHECK-NEXT:   [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
1212   // CHECK-NEXT:   [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
1213   // CHECK-NEXT:   [[A2:%.*]] = load <25 x double>, ptr [[A_PTR]], align 8{{$}}
1214   // CHECK-NEXT:   [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17
1215   // CHECK-NEXT:   store <25 x double> [[INS]], ptr [[A_PTR]], align 8
1216   // CHECK-NEXT:   ret void
1217
1218   a[2][3] -= 1.0;
1219 }
1220
1221 struct Foo {
1222   fx2x3_t mat;
1223 };
1224
1225 void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j) {
1226   // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(ptr noundef %a, float noundef %f, i32 noundef %i, i32 noundef %j)
1227   // NOOPT:         [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1228   // OPT:           [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1229   // CHECK-NEXT:    [[I_EXT:%.*]] = zext i32 [[I]] to i64
1230   // NOOPT-NEXT:    [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1231   // OPT-NEXT:      [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1232   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
1233   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
1234   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
1235   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1236   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1237   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1238   // CHECK-NEXT:    [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
1239   // CHECK-NEXT:    [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
1240   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1241   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
1242   // CHECK-NEXT:    [[MAT2:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1243   // CHECK-NEXT:    [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
1244   // CHECK-NEXT:    store <6 x float> [[INS]], ptr %mat, align 4
1245   // CHECK-NEXT:    ret void
1246
1247   a->mat[i][j] += f;
1248 }
1249
1250 void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) {
1251   // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> noundef %a, i32 noundef %i, i32 noundef %j, <25 x double> noundef %b)
1252   // NOOPT:       [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1253   // OPT:         [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1254   // CHECK-NEXT:  [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
1255   // NOOPT-NEXT:  [[J1:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1256   // OPT-NEXT:    [[J1:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1257   // CHECK-NEXT:  [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
1258   // CHECK-NEXT:  [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
1259   // CHECK-NEXT:  [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
1260   // NOOPT-NEXT:  [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
1261   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27
1262   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1263   // OPT-NEXT:    [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1264   // CHECK-NEXT:  [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
1265   // CHECK-NEXT:  [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
1266   // NOOPT-NEXT:  [[J2:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1267   // OPT-NEXT:    [[J2:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1268   // CHECK-NEXT:  [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
1269   // NOOPT-NEXT:  [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1270   // OPT-NEXT:    [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1271   // CHECK-NEXT:  [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
1272   // CHECK-NEXT:  [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
1273   // CHECK-NEXT:  [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
1274   // NOOPT-NEXT:  [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1275   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27
1276   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1277   // OPT-NEXT:    [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1278   // CHECK-NEXT:  [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
1279   // CHECK-NEXT:  [[MI3:%.*]] = add nsw i32 [[MI2]], 2
1280   // CHECK-NEXT:  [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
1281   // CHECK-NEXT:  [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
1282   // CHECK-NEXT:  [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
1283   // OPT-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25
1284   // OPT-NEXT:    call void @llvm.assume(i1 [[CMP]])
1285   // CHECK-NEXT:  [[B:%.*]] = load <25 x double>, ptr [[B_PTR:%.*]], align 8{{$}}
1286   // CHECK-NEXT:  [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
1287   // CHECK-NEXT:  store <25 x double> [[INS]], ptr [[B_PTR]], align 8
1288   b[a[i][j]][a[j][i] + 2] = 1.5;
1289 }