[clang] Implement lifetime analysis for lifetime_capture_by(X) (#115921)
[llvm-project.git] / clang / test / CodeGen / matrix-type-operators.c
blobdde9857921cd12736f8d830531d791d57f9c9ee4
1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,NOOPT %s
2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s
5 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
6 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
7 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
8 typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2)));
10 // Floating point matrix/scalar additions.
12 void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
13 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b, <25 x double> noundef %c)
14 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
15 // NOOPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
16 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
17 // OPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
18 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[B]], [[C]]
19 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
21 a = b + c;
24 void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
25 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
26 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
27 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
28 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
29 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
30 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[A]], [[B]]
31 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
33 a += b;
36 void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
37 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
38 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
39 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
40 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
41 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
42 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[A]], [[B]]
43 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
45 a -= b;
48 void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
49 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b, <6 x float> noundef %c)
50 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
51 // NOOPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
52 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
53 // OPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
54 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[B]], [[C]]
55 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
57 a = b + c;
60 void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
61 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
62 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
63 // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
64 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
65 // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
66 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[A]], [[B]]
67 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
69 a += b;
72 void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
73 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
74 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
75 // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
76 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
77 // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
78 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[A]], [[B]]
79 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
81 a -= b;
84 void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
85 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
86 // NOOPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
87 // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
88 // OPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
89 // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
90 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
91 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
92 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
93 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
94 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
96 a = a + vf;
99 void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
100 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
101 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
102 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
103 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
104 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
105 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
106 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
107 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
108 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
109 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
111 a += vf;
114 void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
115 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
116 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
117 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
118 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
119 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
120 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
121 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
122 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
123 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
124 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
126 a -= vf;
129 void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
130 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
131 // NOOPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
132 // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
133 // OPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
134 // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
135 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
136 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
137 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
138 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
140 a = a + vd;
143 void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
144 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
145 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
146 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
147 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
148 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
149 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
150 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
151 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
152 // store <25 x double> [[RES]], ptr {{.*}}, align 8
153 a += vd;
156 void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
157 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
158 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
159 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
160 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
161 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
162 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
163 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
164 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
165 // store <25 x double> [[RES]], ptr {{.*}}, align 8
166 a -= vd;
169 void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
170 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
171 // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
172 // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
173 // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
174 // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
175 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
176 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
177 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
178 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
180 b = b + vf;
183 void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
184 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
185 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
186 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
187 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
188 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
189 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
190 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
191 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
192 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
193 b += vf;
196 void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
197 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
198 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
199 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
200 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
201 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
202 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
203 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
204 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
205 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
206 b -= vf;
209 void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
210 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
211 // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
212 // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
213 // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
214 // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
215 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
216 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
217 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
218 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
219 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
221 b = b + vd;
224 void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
225 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
226 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
227 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
228 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
229 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
230 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
231 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
232 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
233 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
234 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
235 b += vd;
238 void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
239 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
240 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
241 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
242 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
243 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
244 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
245 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
246 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
247 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
248 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
249 b -= vd;
252 // Integer matrix/scalar additions
254 void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
255 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b, <27 x i32> noundef %c)
256 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
257 // NOOPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
258 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
259 // OPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
260 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[B]], [[C]]
261 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4
262 a = b + c;
265 void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
266 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
267 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
268 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
269 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
270 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
271 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[A]], [[B]]
272 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4
273 a += b;
276 void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
277 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
278 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
279 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
280 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
281 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
282 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[A]], [[B]]
283 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4
284 a -= b;
287 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) {
288 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b, <8 x i64> noundef %c)
289 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
290 // NOOPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
291 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
292 // OPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
293 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[B]], [[C]]
294 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
296 a = b + c;
299 void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
300 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
301 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
302 // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
303 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
304 // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
305 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[A]], [[B]]
306 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
308 a += b;
311 void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
312 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
313 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
314 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
315 // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
316 // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
317 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[A]], [[B]]
318 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
320 a -= b;
323 void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
324 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
325 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
326 // NOOPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
327 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
328 // OPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
329 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
330 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i64 0
331 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
332 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
333 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
335 a = a + vs;
338 void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
339 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
340 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
341 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
342 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
343 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
344 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
345 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
346 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
347 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
348 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
350 a += vs;
353 void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
354 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
355 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
356 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
357 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
358 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
359 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
360 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
361 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
362 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
363 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
365 a -= vs;
368 void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
369 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
370 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
371 // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
372 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
373 // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
374 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
375 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
376 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
377 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
378 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
380 a = a + vli;
383 void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
384 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
385 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
386 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
387 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
388 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
389 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
390 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
391 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
392 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
393 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
395 a += vli;
398 void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
399 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
400 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
401 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
402 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
403 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
404 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
405 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
406 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
407 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
408 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
410 a -= vli;
413 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
414 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
415 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
416 // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
417 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
418 // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
419 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
420 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
421 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
422 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
423 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
425 a = a + vulli;
428 void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
429 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
430 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
431 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
432 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
433 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
434 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
435 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
436 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
437 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
438 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
440 a += vulli;
443 void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
444 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
445 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
446 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
447 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
448 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
449 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
450 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
451 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
452 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
453 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
455 a -= vulli;
458 void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
459 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
460 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
461 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
462 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
463 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
464 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
465 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
466 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
467 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
468 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
470 b = vs + b;
473 void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
474 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
475 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
476 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
477 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
478 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
479 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
480 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
481 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
482 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
483 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
485 b += vs;
488 void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
489 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
490 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
491 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
492 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
493 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
494 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
495 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
496 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
497 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
498 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
500 b -= vs;
503 void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
504 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
505 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
506 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
507 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
508 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
509 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
510 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
511 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
512 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
514 b = vli + b;
517 void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
518 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
519 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
520 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
521 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
522 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
523 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
524 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
525 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
526 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
528 b += vli;
531 void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
532 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
533 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
534 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
535 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
536 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
537 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
538 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
539 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
540 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
542 b -= vli;
545 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
546 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long
547 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
548 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
549 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
550 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
551 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
552 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
553 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
554 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
555 b = vulli + b;
558 void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
559 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long
560 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
561 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
562 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
563 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
564 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
565 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
566 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
567 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
569 b += vulli;
572 void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
573 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long
574 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
575 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
576 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
577 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
578 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
579 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
580 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
581 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
583 b -= vulli;
586 // Tests for matrix multiplication.
588 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
589 // CHECK-LABEL: @multiply_matrix_matrix_double(
590 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
591 // NOOPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
592 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
593 // OPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
594 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
595 // CHECK-NEXT: store <25 x double> [[RES]], ptr %a, align 8
596 // CHECK: ret void
599 dx5x5_t a;
600 a = b * c;
603 void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
604 // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
605 // NOOPT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
606 // NOOPT-NEXT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
607 // OPT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
608 // OPT-NEXT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
609 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
610 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
611 // CHECK-NEXT: ret void
612 b *= c;
615 typedef int ix3x9_t __attribute__((matrix_type(3, 9)));
616 typedef int ix9x9_t __attribute__((matrix_type(9, 9)));
617 // CHECK-LABEL: @multiply_matrix_matrix_int(
618 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
619 // NOOPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
620 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
621 // OPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
622 // CHECK-NEXT: [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
623 // CHECK-NEXT: store <81 x i32> [[RES]], ptr %a, align 4
624 // CHECK: ret void
626 void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) {
627 ix9x9_t a;
628 a = b * c;
631 // CHECK-LABEL: @multiply_double_matrix_scalar_float(
632 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
633 // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
634 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
635 // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
636 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
637 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
638 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
639 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
640 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
641 // CHECK-NEXT: ret void
643 void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
644 a = a * s;
647 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
648 // NOOPT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
649 // OPT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
650 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
651 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
652 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
653 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
654 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
655 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
656 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
657 // CHECK-NEXT: ret void
659 void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
660 a *= s;
663 // CHECK-LABEL: @multiply_double_matrix_scalar_double(
664 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
665 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
666 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
667 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
668 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
669 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
670 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
671 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
672 // CHECK-NEXT: ret void
674 void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
675 a = a * s;
678 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
679 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
680 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
681 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
682 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
683 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
684 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
685 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
686 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
687 // CHECK-NEXT: ret void
688 void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) {
689 a *= s;
692 // CHECK-LABEL: @multiply_float_matrix_scalar_double(
693 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
694 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
695 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
696 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
697 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
698 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
699 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
700 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
701 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
702 // CHECK-NEXT: ret void
704 void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
705 b = s * b;
708 // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
709 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
710 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
711 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
712 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
713 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
714 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
715 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
716 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
717 // store <6 x float> %3, ptr [[MAT_ADDR]], align 4
718 // ret void
719 void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) {
720 b *= s;
723 // CHECK-LABEL: @multiply_int_matrix_scalar_short(
724 // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
725 // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
726 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
727 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
728 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
729 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
730 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
731 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
732 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
733 // CHECK-NEXT: ret void
735 void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
736 b = s * b;
739 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
740 // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
741 // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
742 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
743 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
744 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
745 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
746 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
747 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
748 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
749 // CHECK-NEXT: ret void
751 void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) {
752 b *= s;
755 // CHECK-LABEL: @multiply_int_matrix_scalar_ull(
756 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
757 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
758 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
759 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
760 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
761 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
762 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
763 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
764 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
765 // CHECK-NEXT: ret void
767 void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
768 b = b * s;
771 void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
772 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull(
773 // NOOPT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
774 // OPT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
775 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
776 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
777 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
778 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
779 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
780 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
781 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
782 // CHECK-NEXT: ret void
784 b *= s;
787 // CHECK-LABEL: @multiply_float_matrix_constant(
788 // CHECK-NEXT: entry:
789 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
790 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
791 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
792 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
793 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00)
794 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
795 // CHECK-NEXT: ret void
797 void multiply_float_matrix_constant(fx2x3_t a) {
798 a = a * 2.5;
801 // CHECK-LABEL: @multiply_compound_float_matrix_constant(
802 // CHECK-NEXT: entry:
803 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
804 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
805 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
806 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
807 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00)
808 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
809 // CHECK-NEXT: ret void
810 void multiply_compound_float_matrix_constant(fx2x3_t a) {
811 a *= 2.5;
814 // CHECK-LABEL: @multiply_int_matrix_constant(
815 // CHECK-NEXT: entry:
816 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
817 // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
818 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
819 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
820 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> splat (i32 5), [[MAT]]
821 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
822 // CHECK-NEXT: ret void
824 void multiply_int_matrix_constant(ix9x3_t a) {
825 a = 5 * a;
828 // CHECK-LABEL: @multiply_compound_int_matrix_constant(
829 // CHECK-NEXT: entry:
830 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
831 // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
832 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
833 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
834 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], splat (i32 5)
835 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
836 // CHECK-NEXT: ret void
838 void multiply_compound_int_matrix_constant(ix9x3_t a) {
839 a *= 5;
842 // CHECK-LABEL: @divide_double_matrix_scalar_float(
843 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
844 // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
845 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
846 // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
847 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
848 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
849 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
850 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
851 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
852 // CHECK-NEXT: ret void
854 void divide_double_matrix_scalar_float(dx5x5_t a, float s) {
855 a = a / s;
858 // CHECK-LABEL: @divide_double_matrix_scalar_double(
859 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
860 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
861 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
862 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
863 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
864 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
865 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
866 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
867 // CHECK-NEXT: ret void
869 void divide_double_matrix_scalar_double(dx5x5_t a, double s) {
870 a = a / s;
873 // CHECK-LABEL: @divide_float_matrix_scalar_double(
874 // NOOPT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
875 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
876 // OPT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
877 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
878 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
879 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
880 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
881 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
882 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
883 // CHECK-NEXT: ret void
885 void divide_float_matrix_scalar_double(fx2x3_t b, double s) {
886 b = b / s;
889 // CHECK-LABEL: @divide_int_matrix_scalar_short(
890 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
891 // NOOPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
892 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
893 // OPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
894 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
895 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
896 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
897 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
898 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
899 // CHECK-NEXT: ret void
901 void divide_int_matrix_scalar_short(ix9x3_t b, short s) {
902 b = b / s;
905 // CHECK-LABEL: @divide_int_matrix_scalar_ull(
906 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
907 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
908 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
909 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
910 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
911 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
912 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
913 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
914 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
915 // CHECK-NEXT: ret void
917 void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
918 b = b / s;
921 // CHECK-LABEL: @divide_ull_matrix_scalar_ull(
922 // NOOPT: [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
923 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
924 // OPT: [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8, !tbaa !{{[0-9]+}}{{$}}
925 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
926 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i64 0
927 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
928 // CHECK-NEXT: [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
929 // CHECK-NEXT: store <8 x i64> [[RES]], ptr [[MAT_ADDR]], align 8
930 // CHECK-NEXT: ret void
932 void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) {
933 b = b / s;
936 // CHECK-LABEL: @divide_float_matrix_constant(
937 // CHECK-NEXT: entry:
938 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
939 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
940 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
941 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
942 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], splat (float 2.500000e+00)
943 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
944 // CHECK-NEXT: ret void
946 void divide_float_matrix_constant(fx2x3_t a) {
947 a = a / 2.5;
950 // Tests for the matrix type operators.
952 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
953 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
955 // Check that we can use matrix index expression on different floating point
956 // matrixes and indices.
957 void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) {
958 // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double(
959 // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
960 // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
961 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
962 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5
963 // CHECK-NEXT: store <25 x double> [[MATINS]], ptr {{.*}}, align 8
964 // CHECK-NEXT: ret void
966 a[0ll][1u] = d;
969 void insert_double_matrix_const_idx_i_u_double(dx5x5_t a, double d) {
970 // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double(
971 // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
972 // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
973 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
974 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21
975 // CHECK-NEXT: store <25 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
976 // CHECK-NEXT: ret void
978 a[1][4u] = d;
981 void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b, float e) {
982 // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float(
983 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
984 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
985 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
986 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3
987 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
988 // CHECK-NEXT: ret void
990 b[1ull][1] = e;
993 void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) {
994 // CHECK-LABEL: @insert_float_matrix_idx_i_u_float(
995 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
996 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
997 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
998 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
999 // CHECK-NEXT: [[J_EXT:%.*]] = sext i32 [[J]] to i64
1000 // NOOPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4{{$}}
1001 // OPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1002 // CHECK-NEXT: [[K_EXT:%.*]] = zext i32 [[K]] to i64
1003 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
1004 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1005 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1006 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1007 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1008 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1009 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1010 // CHECK-NEXT: ret void
1012 b[j][k] = e;
1015 void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned long long k) {
1016 // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float(
1017 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1018 // NOOPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2{{$}}
1019 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1020 // OPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1021 // CHECK-NEXT: [[J_EXT:%.*]] = sext i16 [[J]] to i64
1022 // NOOPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8{{$}}
1023 // OPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1024 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K]], 2
1025 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1026 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1027 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1028 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1029 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1030 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1031 // CHECK-NEXT: ret void
1033 (b)[j][k] = e;
1036 // Check that we can can use matrix index expressions on integer matrixes.
1037 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
1038 void insert_int_idx_expr(ix9x3_t a, int i) {
1039 // CHECK-LABEL: @insert_int_idx_expr(
1040 // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1041 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1042 // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1043 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1044 // CHECK-NEXT: [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
1045 // CHECK-NEXT: [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
1046 // CHECK-NEXT: [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
1047 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1048 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1049 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1050 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
1051 // CHECK-NEXT: store <27 x i32> [[MATINS]], ptr [[MAT_ADDR]], align 4
1052 // CHECK-NEXT: ret void
1054 a[4 + i][1 + 1u] = i;
1057 // Check that we can can use matrix index expressions on FP and integer
1058 // matrixes.
1059 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
1060 void insert_float_into_int_matrix(ix9x3_t *a, int i) {
1061 // CHECK-LABEL: @insert_float_into_int_matrix(
1062 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1063 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1064 // NOOPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8{{$}}
1065 // OPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1066 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR1]], align 4{{$}}
1067 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13
1068 // CHECK-NEXT: store <27 x i32> [[MATINS]], ptr [[MAT_ADDR1]], align 4
1069 // CHECK-NEXT: ret void
1071 (*a)[4][1] = i;
1074 // Check that we can use overloaded matrix index expressions on matrixes with
1075 // matching dimensions, but different element types.
1076 typedef double dx3x3_t __attribute__((matrix_type(3, 3)));
1077 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
1078 void insert_matching_dimensions1(dx3x3_t a, double i) {
1079 // CHECK-LABEL: @insert_matching_dimensions1(
1080 // NOOPT: [[I:%.*]] = load double, ptr %i.addr, align 8{{$}}
1081 // OPT: [[I:%.*]] = load double, ptr %i.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1082 // CHECK-NEXT: [[MAT:%.*]] = load <9 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
1083 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5
1084 // CHECK-NEXT: store <9 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
1085 // CHECK-NEXT: ret void
1087 a[2u][1u] = i;
1090 void insert_matching_dimensions(fx3x3_t b, float e) {
1091 // CHECK-LABEL: @insert_matching_dimensions(
1092 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1093 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1094 // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1095 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7
1096 // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1097 // CHECK-NEXT: ret void
1099 b[1u][2u] = e;
1102 double extract_double(dx5x5_t a) {
1103 // CHECK-LABEL: @extract_double(
1104 // NOOPT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
1105 // OPT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
1106 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12
1107 // CHECK-NEXT: ret double [[MATEXT]]
1109 return a[2][3 - 1u];
1112 double extract_float(fx3x3_t b) {
1113 // CHECK-LABEL: @extract_float(
1114 // NOOPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4{{$}}
1115 // OPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1116 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5
1117 // CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double
1118 // CHECK-NEXT: ret double [[TO_DOUBLE]]
1120 return b[2][1];
1123 int extract_int(ix9x3_t c, unsigned long j) {
1124 // CHECK-LABEL: @extract_int(
1125 // NOOPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1126 // NOOPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1127 // OPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1128 // OPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1129 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J2]], 9
1130 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
1131 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1132 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1133 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1134 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1135 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
1136 // CHECK-NEXT: ret i32 [[MATEXT]]
1138 return c[j][j];
1141 typedef double dx3x2_t __attribute__((matrix_type(3, 2)));
1143 double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) {
1144 // CHECK-LABEL: @test_extract_matrix_pointer1(
1145 // NOOPT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1146 // OPT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1147 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
1148 // CHECK-NEXT: [[IDX:%.*]] = add i64 3, [[J_EXT]]
1149 // NOOPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1150 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
1151 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1152 // OPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1153 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 1
1154 // NOOPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1155 // OPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1156 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 2
1157 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1158 // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1159 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
1160 // CHECK-NEXT: ret double [[MATEXT]]
1162 return ptr[1][2][j][1];
1165 double test_extract_matrix_pointer2(dx3x2_t **ptr) {
1166 // CHECK-LABEL: @test_extract_matrix_pointer2(
1167 // CHECK-NEXT: entry:
1168 // NOOPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1169 // OPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1170 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 4
1171 // NOOPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1172 // OPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1173 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 6
1174 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1175 // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1176 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5
1177 // CHECK-NEXT: ret double [[MATEXT]]
1179 return (*(*(ptr + 4) + 6))[2][1 * 3 - 2];
1182 void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) {
1183 // CHECK-LABEL: @insert_extract(
1184 // NOOPT: [[K:%.*]] = load i16, ptr %k.addr, align 2{{$}}
1185 // OPT: [[K:%.*]] = load i16, ptr %k.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1186 // CHECK-NEXT: [[K_EXT:%.*]] = sext i16 [[K]] to i64
1187 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
1188 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], 0
1189 // NOOPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1190 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9
1191 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1192 // OPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
1193 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]]
1194 // NOOPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1195 // OPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1196 // CHECK-NEXT: [[IDX3:%.*]] = mul i64 [[J]], 3
1197 // CHECK-NEXT: [[IDX4:%.*]] = add i64 [[IDX3]], 2
1198 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9
1199 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1200 // CHECK-NEXT: [[MAT2:%.*]] = load <9 x float>, ptr [[MAT_ADDR]], align 4{{$}}
1201 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
1202 // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1203 // CHECK-NEXT: ret void
1205 b[2][j] = b[0][k];
1208 void insert_compound_stmt(dx5x5_t a) {
1209 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a)
1210 // CHECK: [[A:%.*]] = load <25 x double>, ptr [[A_PTR:%.*]], align 8{{$}}
1211 // CHECK-NEXT: [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
1212 // CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
1213 // CHECK-NEXT: [[A2:%.*]] = load <25 x double>, ptr [[A_PTR]], align 8{{$}}
1214 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17
1215 // CHECK-NEXT: store <25 x double> [[INS]], ptr [[A_PTR]], align 8
1216 // CHECK-NEXT: ret void
1218 a[2][3] -= 1.0;
1221 struct Foo {
1222 fx2x3_t mat;
1225 void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j) {
1226 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(ptr noundef %a, float noundef %f, i32 noundef %i, i32 noundef %j)
1227 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1228 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1229 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
1230 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1231 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1232 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
1233 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
1234 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
1235 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1236 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1237 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1238 // CHECK-NEXT: [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
1239 // CHECK-NEXT: [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
1240 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1241 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1242 // CHECK-NEXT: [[MAT2:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1243 // CHECK-NEXT: [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
1244 // CHECK-NEXT: store <6 x float> [[INS]], ptr %mat, align 4
1245 // CHECK-NEXT: ret void
1247 a->mat[i][j] += f;
1250 void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) {
1251 // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> noundef %a, i32 noundef %i, i32 noundef %j, <25 x double> noundef %b)
1252 // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1253 // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1254 // CHECK-NEXT: [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
1255 // NOOPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1256 // OPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1257 // CHECK-NEXT: [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
1258 // CHECK-NEXT: [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
1259 // CHECK-NEXT: [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
1260 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
1261 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27
1262 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1263 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1264 // CHECK-NEXT: [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
1265 // CHECK-NEXT: [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
1266 // NOOPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1267 // OPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1268 // CHECK-NEXT: [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
1269 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1270 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1271 // CHECK-NEXT: [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
1272 // CHECK-NEXT: [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
1273 // CHECK-NEXT: [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
1274 // NOOPT-NEXT: [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1275 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27
1276 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1277 // OPT-NEXT: [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1278 // CHECK-NEXT: [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
1279 // CHECK-NEXT: [[MI3:%.*]] = add nsw i32 [[MI2]], 2
1280 // CHECK-NEXT: [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
1281 // CHECK-NEXT: [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
1282 // CHECK-NEXT: [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
1283 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25
1284 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1285 // CHECK-NEXT: [[B:%.*]] = load <25 x double>, ptr [[B_PTR:%.*]], align 8{{$}}
1286 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
1287 // CHECK-NEXT: store <25 x double> [[INS]], ptr [[B_PTR]], align 8
1288 b[a[i][j]][a[j][i] + 2] = 1.5;