Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / matrix-type-operators.c
blobc5886858515483b933cb771400d5ab72771b5558
1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,NOOPT %s
2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s
5 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
6 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
7 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
8 typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2)));
10 // Floating point matrix/scalar additions.
12 void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
13 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b, <25 x double> noundef %c)
14 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
15 // NOOPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
16 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
17 // OPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
18 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[B]], [[C]]
19 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
21 a = b + c;
24 void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
25 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
26 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
27 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
28 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
29 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
30 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[A]], [[B]]
31 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
33 a += b;
36 void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
37 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
38 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
39 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
40 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
41 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
42 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[A]], [[B]]
43 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
45 a -= b;
48 void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
49 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b, <6 x float> noundef %c)
50 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
51 // NOOPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
52 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
53 // OPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
54 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[B]], [[C]]
55 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
57 a = b + c;
60 void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
61 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
62 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
63 // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
64 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
65 // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
66 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[A]], [[B]]
67 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
69 a += b;
72 void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
73 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
74 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
75 // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
76 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
77 // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
78 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[A]], [[B]]
79 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
81 a -= b;
84 void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
85 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
86 // NOOPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
87 // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
88 // OPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
89 // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
90 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
91 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
92 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
93 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
94 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
96 a = a + vf;
99 void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
100 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
101 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
102 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
103 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
104 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
105 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
106 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
107 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
108 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
109 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
111 a += vf;
114 void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
115 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
116 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
117 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
118 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
119 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
120 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
121 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
122 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
123 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
124 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
126 a -= vf;
129 void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
130 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
131 // NOOPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
132 // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
133 // OPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
134 // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
135 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
136 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
137 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
138 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
140 a = a + vd;
143 void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
144 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
145 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
146 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
147 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
148 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
149 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
150 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
151 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
152 // store <25 x double> [[RES]], ptr {{.*}}, align 8
153 a += vd;
156 void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
157 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
158 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
159 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
160 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
161 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
162 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
163 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
164 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
165 // store <25 x double> [[RES]], ptr {{.*}}, align 8
166 a -= vd;
169 void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
170 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
171 // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
172 // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
173 // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
174 // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
175 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
176 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
177 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
178 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
180 b = b + vf;
183 void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
184 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
185 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
186 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
187 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
188 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
189 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
190 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
191 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
192 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
193 b += vf;
196 void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
197 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
198 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
199 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
200 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
201 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
202 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
203 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
204 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
205 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
206 b -= vf;
209 void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
210 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
211 // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
212 // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
213 // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
214 // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
215 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
216 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
217 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
218 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
219 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
221 b = b + vd;
224 void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
225 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
226 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
227 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
228 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
229 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
230 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
231 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
232 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
233 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
234 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
235 b += vd;
238 void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
239 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
240 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
241 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
242 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
243 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
244 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
245 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
246 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
247 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
248 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
249 b -= vd;
252 // Integer matrix/scalar additions
254 void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
255 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b, <27 x i32> noundef %c)
256 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
257 // NOOPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
258 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
259 // OPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
260 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[B]], [[C]]
261 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4
262 a = b + c;
265 void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
266 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
267 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
268 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
269 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
270 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
271 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[A]], [[B]]
272 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4
273 a += b;
276 void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
277 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
278 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
279 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
280 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
281 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
282 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[A]], [[B]]
283 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4
284 a -= b;
287 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) {
288 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b, <8 x i64> noundef %c)
289 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
290 // NOOPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
291 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
292 // OPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
293 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[B]], [[C]]
294 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
296 a = b + c;
299 void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
300 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
301 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
302 // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
303 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
304 // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
305 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[A]], [[B]]
306 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
308 a += b;
311 void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
312 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
313 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
314 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
315 // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
316 // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
317 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[A]], [[B]]
318 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
320 a -= b;
323 void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
324 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
325 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
326 // NOOPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
327 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
328 // OPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
329 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
330 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i64 0
331 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
332 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
333 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
335 a = a + vs;
338 void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
339 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
340 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
341 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
342 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
343 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
344 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
345 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
346 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
347 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
348 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
350 a += vs;
353 void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
354 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
355 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
356 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
357 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
358 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
359 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
360 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
361 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
362 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
363 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
365 a -= vs;
368 void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
369 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
370 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
371 // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
372 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
373 // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
374 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
375 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
376 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
377 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
378 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
380 a = a + vli;
383 void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
384 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
385 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
386 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
387 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
388 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
389 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
390 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
391 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
392 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
393 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
395 a += vli;
398 void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
399 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
400 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
401 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
402 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
403 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
404 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
405 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
406 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
407 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
408 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
410 a -= vli;
413 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
414 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
415 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
416 // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
417 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
418 // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
419 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
420 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
421 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
422 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
423 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
425 a = a + vulli;
428 void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
429 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
430 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
431 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
432 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
433 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
434 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
435 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
436 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
437 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
438 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
440 a += vulli;
443 void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
444 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
445 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
446 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
447 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
448 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
449 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
450 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
451 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
452 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
453 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
455 a -= vulli;
458 void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
459 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
460 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
461 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
462 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
463 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
464 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
465 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
466 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
467 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
468 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
470 b = vs + b;
473 void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
474 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
475 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
476 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
477 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
478 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
479 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
480 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
481 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
482 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
483 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
485 b += vs;
488 void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
489 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
490 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
491 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
492 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
493 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
494 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
495 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
496 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
497 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
498 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
500 b -= vs;
503 void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
504 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
505 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
506 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
507 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
508 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
509 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
510 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
511 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
512 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
514 b = vli + b;
517 void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
518 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
519 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
520 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
521 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
522 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
523 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
524 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
525 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
526 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
528 b += vli;
531 void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
532 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
533 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
534 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
535 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
536 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
537 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
538 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
539 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
540 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
542 b -= vli;
545 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
546 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long
547 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
548 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
549 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
550 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
551 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
552 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
553 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
554 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
555 b = vulli + b;
558 void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
559 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long
560 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
561 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
562 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
563 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
564 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
565 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
566 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
567 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
569 b += vulli;
572 void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
573 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long
574 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
575 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
576 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
577 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
578 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
579 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
580 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
581 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
583 b -= vulli;
586 // Tests for matrix multiplication.
588 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
589 // CHECK-LABEL: @multiply_matrix_matrix_double(
590 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
591 // NOOPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
592 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
593 // OPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
594 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
595 // CHECK-NEXT: store <25 x double> [[RES]], ptr %a, align 8
596 // CHECK: ret void
599 dx5x5_t a;
600 a = b * c;
603 void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
604 // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
605 // NOOPT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
606 // NOOPT-NEXT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
607 // OPT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
608 // OPT-NEXT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
609 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
610 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
611 // CHECK-NEXT: ret void
612 b *= c;
615 typedef int ix3x9_t __attribute__((matrix_type(3, 9)));
616 typedef int ix9x9_t __attribute__((matrix_type(9, 9)));
617 // CHECK-LABEL: @multiply_matrix_matrix_int(
618 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
619 // NOOPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
620 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
621 // OPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
622 // CHECK-NEXT: [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
623 // CHECK-NEXT: store <81 x i32> [[RES]], ptr %a, align 4
624 // CHECK: ret void
626 void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) {
627 ix9x9_t a;
628 a = b * c;
631 // CHECK-LABEL: @multiply_double_matrix_scalar_float(
632 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
633 // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
634 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
635 // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
636 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
637 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
638 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
639 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
640 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
641 // CHECK-NEXT: ret void
643 void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
644 a = a * s;
647 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
648 // NOOPT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
649 // OPT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
650 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
651 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
652 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
653 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
654 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
655 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
656 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
657 // CHECK-NEXT: ret void
659 void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
660 a *= s;
663 // CHECK-LABEL: @multiply_double_matrix_scalar_double(
664 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
665 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
666 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
667 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
668 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
669 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
670 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
671 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
672 // CHECK-NEXT: ret void
674 void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
675 a = a * s;
678 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
679 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
680 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
681 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
682 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
683 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
684 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
685 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
686 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
687 // CHECK-NEXT: ret void
688 void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) {
689 a *= s;
692 // CHECK-LABEL: @multiply_float_matrix_scalar_double(
693 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
694 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
695 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
696 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
697 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
698 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
699 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
700 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
701 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
702 // CHECK-NEXT: ret void
704 void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
705 b = s * b;
708 // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
709 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
710 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
711 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
712 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
713 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
714 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
715 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
716 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
717 // store <6 x float> %3, ptr [[MAT_ADDR]], align 4
718 // ret void
719 void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) {
720 b *= s;
723 // CHECK-LABEL: @multiply_int_matrix_scalar_short(
724 // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
725 // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
726 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
727 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
728 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
729 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
730 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
731 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
732 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
733 // CHECK-NEXT: ret void
735 void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
736 b = s * b;
739 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
740 // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
741 // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
742 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
743 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
744 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
745 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
746 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
747 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
748 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
749 // CHECK-NEXT: ret void
751 void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) {
752 b *= s;
755 // CHECK-LABEL: @multiply_int_matrix_scalar_ull(
756 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
757 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
758 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
759 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
760 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
761 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
762 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
763 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
764 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
765 // CHECK-NEXT: ret void
767 void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
768 b = b * s;
771 void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
772 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull(
773 // NOOPT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
774 // OPT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
775 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
776 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
777 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
778 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
779 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
780 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
781 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
782 // CHECK-NEXT: ret void
784 b *= s;
787 // CHECK-LABEL: @multiply_float_matrix_constant(
788 // CHECK-NEXT: entry:
789 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
790 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
791 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
792 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
793 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
794 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
795 // CHECK-NEXT: ret void
797 void multiply_float_matrix_constant(fx2x3_t a) {
798 a = a * 2.5;
801 // CHECK-LABEL: @multiply_compound_float_matrix_constant(
802 // CHECK-NEXT: entry:
803 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
804 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
805 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
806 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
807 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
808 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
809 // CHECK-NEXT: ret void
810 void multiply_compound_float_matrix_constant(fx2x3_t a) {
811 a *= 2.5;
814 // CHECK-LABEL: @multiply_int_matrix_constant(
815 // CHECK-NEXT: entry:
816 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
817 // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
818 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
819 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
820 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, [[MAT]]
821 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
822 // CHECK-NEXT: ret void
824 void multiply_int_matrix_constant(ix9x3_t a) {
825 a = 5 * a;
828 // CHECK-LABEL: @multiply_compound_int_matrix_constant(
829 // CHECK-NEXT: entry:
830 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
831 // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
832 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
833 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
834 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
835 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
836 // CHECK-NEXT: ret void
838 void multiply_compound_int_matrix_constant(ix9x3_t a) {
839 a *= 5;
842 // CHECK-LABEL: @divide_double_matrix_scalar_float(
843 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
844 // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
845 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
846 // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
847 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
848 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
849 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
850 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
851 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
852 // CHECK-NEXT: ret void
854 void divide_double_matrix_scalar_float(dx5x5_t a, float s) {
855 a = a / s;
858 // CHECK-LABEL: @divide_double_matrix_scalar_double(
859 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
860 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
861 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
862 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
863 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
864 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
865 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
866 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
867 // CHECK-NEXT: ret void
869 void divide_double_matrix_scalar_double(dx5x5_t a, double s) {
870 a = a / s;
873 // CHECK-LABEL: @divide_float_matrix_scalar_double(
874 // NOOPT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
875 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
876 // OPT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
877 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
878 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
879 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
880 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
881 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
882 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
883 // CHECK-NEXT: ret void
885 void divide_float_matrix_scalar_double(fx2x3_t b, double s) {
886 b = b / s;
889 // CHECK-LABEL: @divide_int_matrix_scalar_short(
890 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
891 // NOOPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
892 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
893 // OPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
894 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
895 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
896 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
897 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
898 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
899 // CHECK-NEXT: ret void
901 void divide_int_matrix_scalar_short(ix9x3_t b, short s) {
902 b = b / s;
905 // CHECK-LABEL: @divide_int_matrix_scalar_ull(
906 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
907 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
908 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
909 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
910 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
911 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
912 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
913 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
914 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
915 // CHECK-NEXT: ret void
917 void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
918 b = b / s;
921 // CHECK-LABEL: @divide_ull_matrix_scalar_ull(
922 // NOOPT: [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
923 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
924 // OPT: [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8, !tbaa !{{[0-9]+}}{{$}}
925 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
926 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i64 0
927 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
928 // CHECK-NEXT: [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
929 // CHECK-NEXT: store <8 x i64> [[RES]], ptr [[MAT_ADDR]], align 8
930 // CHECK-NEXT: ret void
932 void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) {
933 b = b / s;
936 // CHECK-LABEL: @divide_float_matrix_constant(
937 // CHECK-NEXT: entry:
938 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
939 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
940 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
941 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
942 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
943 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
944 // CHECK-NEXT: ret void
946 void divide_float_matrix_constant(fx2x3_t a) {
947 a = a / 2.5;
950 // Tests for the matrix type operators.
952 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
953 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
955 // Check that we can use matrix index expression on different floating point
956 // matrixes and indices.
957 void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) {
958 // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double(
959 // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
960 // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
961 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
962 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5
963 // CHECK-NEXT: store <25 x double> [[MATINS]], ptr {{.*}}, align 8
964 // CHECK-NEXT: ret void
966 a[0ll][1u] = d;
969 void insert_double_matrix_const_idx_i_u_double(dx5x5_t a, double d) {
970 // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double(
971 // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
972 // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
973 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
974 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21
975 // CHECK-NEXT: store <25 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
976 // CHECK-NEXT: ret void
978 a[1][4u] = d;
981 void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b, float e) {
982 // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float(
983 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
984 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
985 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
986 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3
987 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
988 // CHECK-NEXT: ret void
990 b[1ull][1] = e;
993 void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) {
994 // CHECK-LABEL: @insert_float_matrix_idx_i_u_float(
995 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
996 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
997 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
998 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
999 // CHECK-NEXT: [[J_EXT:%.*]] = sext i32 [[J]] to i64
1000 // NOOPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4{{$}}
1001 // OPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1002 // CHECK-NEXT: [[K_EXT:%.*]] = zext i32 [[K]] to i64
1003 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
1004 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1005 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1006 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1007 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1008 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1009 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1010 // CHECK-NEXT: ret void
1012 b[j][k] = e;
1015 void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned long long k) {
1016 // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float(
1017 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1018 // NOOPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2{{$}}
1019 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1020 // OPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1021 // CHECK-NEXT: [[J_EXT:%.*]] = sext i16 [[J]] to i64
1022 // NOOPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8{{$}}
1023 // OPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1024 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K]], 2
1025 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1026 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1027 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1028 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1029 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1030 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1031 // CHECK-NEXT: ret void
1033 (b)[j][k] = e;
1036 // Check that we can can use matrix index expressions on integer matrixes.
1037 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
1038 void insert_int_idx_expr(ix9x3_t a, int i) {
1039 // CHECK-LABEL: @insert_int_idx_expr(
1040 // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1041 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1042 // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1043 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1044 // CHECK-NEXT: [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
1045 // CHECK-NEXT: [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
1046 // CHECK-NEXT: [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
1047 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1048 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1049 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1050 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
1051 // CHECK-NEXT: store <27 x i32> [[MATINS]], ptr [[MAT_ADDR]], align 4
1052 // CHECK-NEXT: ret void
1054 a[4 + i][1 + 1u] = i;
1057 // Check that we can can use matrix index expressions on FP and integer
1058 // matrixes.
1059 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
1060 void insert_float_into_int_matrix(ix9x3_t *a, int i) {
1061 // CHECK-LABEL: @insert_float_into_int_matrix(
1062 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1063 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1064 // NOOPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8{{$}}
1065 // OPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1066 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR1]], align 4{{$}}
1067 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13
1068 // CHECK-NEXT: store <27 x i32> [[MATINS]], ptr [[MAT_ADDR1]], align 4
1069 // CHECK-NEXT: ret void
1071 (*a)[4][1] = i;
1074 // Check that we can use overloaded matrix index expressions on matrixes with
1075 // matching dimensions, but different element types.
1076 typedef double dx3x3_t __attribute__((matrix_type(3, 3)));
1077 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
1078 void insert_matching_dimensions1(dx3x3_t a, double i) {
1079 // CHECK-LABEL: @insert_matching_dimensions1(
1080 // NOOPT: [[I:%.*]] = load double, ptr %i.addr, align 8{{$}}
1081 // OPT: [[I:%.*]] = load double, ptr %i.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1082 // CHECK-NEXT: [[MAT:%.*]] = load <9 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
1083 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5
1084 // CHECK-NEXT: store <9 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
1085 // CHECK-NEXT: ret void
1087 a[2u][1u] = i;
1090 void insert_matching_dimensions(fx3x3_t b, float e) {
1091 // CHECK-LABEL: @insert_matching_dimensions(
1092 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1093 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1094 // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1095 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7
1096 // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1097 // CHECK-NEXT: ret void
1099 b[1u][2u] = e;
1102 double extract_double(dx5x5_t a) {
1103 // CHECK-LABEL: @extract_double(
1104 // NOOPT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
1105 // OPT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
1106 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12
1107 // CHECK-NEXT: ret double [[MATEXT]]
1109 return a[2][3 - 1u];
1112 double extract_float(fx3x3_t b) {
1113 // CHECK-LABEL: @extract_float(
1114 // NOOPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4{{$}}
1115 // OPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1116 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5
1117 // CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double
1118 // CHECK-NEXT: ret double [[TO_DOUBLE]]
1120 return b[2][1];
1123 int extract_int(ix9x3_t c, unsigned long j) {
1124 // CHECK-LABEL: @extract_int(
1125 // NOOPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1126 // NOOPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1127 // OPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1128 // OPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1129 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J2]], 9
1130 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
1131 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1132 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1133 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1134 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1135 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
1136 // CHECK-NEXT: ret i32 [[MATEXT]]
1138 return c[j][j];
1141 typedef double dx3x2_t __attribute__((matrix_type(3, 2)));
1143 double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) {
1144 // CHECK-LABEL: @test_extract_matrix_pointer1(
1145 // NOOPT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1146 // OPT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1147 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
1148 // CHECK-NEXT: [[IDX:%.*]] = add i64 3, [[J_EXT]]
1149 // NOOPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1150 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
1151 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1152 // OPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1153 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 1
1154 // NOOPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1155 // OPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1156 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 2
1157 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1158 // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1159 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
1160 // CHECK-NEXT: ret double [[MATEXT]]
1162 return ptr[1][2][j][1];
1165 double test_extract_matrix_pointer2(dx3x2_t **ptr) {
1166 // CHECK-LABEL: @test_extract_matrix_pointer2(
1167 // CHECK-NEXT: entry:
1168 // NOOPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1169 // OPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1170 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 4
1171 // NOOPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1172 // OPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1173 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 6
1174 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1175 // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1176 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5
1177 // CHECK-NEXT: ret double [[MATEXT]]
1179 return (*(*(ptr + 4) + 6))[2][1 * 3 - 2];
1182 void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) {
1183 // CHECK-LABEL: @insert_extract(
1184 // NOOPT: [[K:%.*]] = load i16, ptr %k.addr, align 2{{$}}
1185 // OPT: [[K:%.*]] = load i16, ptr %k.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1186 // CHECK-NEXT: [[K_EXT:%.*]] = sext i16 [[K]] to i64
1187 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
1188 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], 0
1189 // NOOPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1190 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9
1191 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1192 // OPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
1193 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]]
1194 // NOOPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1195 // OPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1196 // CHECK-NEXT: [[IDX3:%.*]] = mul i64 [[J]], 3
1197 // CHECK-NEXT: [[IDX4:%.*]] = add i64 [[IDX3]], 2
1198 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9
1199 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1200 // CHECK-NEXT: [[MAT2:%.*]] = load <9 x float>, ptr [[MAT_ADDR]], align 4{{$}}
1201 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
1202 // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1203 // CHECK-NEXT: ret void
1205 b[2][j] = b[0][k];
1208 void insert_compound_stmt(dx5x5_t a) {
1209 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a)
1210 // CHECK: [[A:%.*]] = load <25 x double>, ptr [[A_PTR:%.*]], align 8{{$}}
1211 // CHECK-NEXT: [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
1212 // CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
1213 // CHECK-NEXT: [[A2:%.*]] = load <25 x double>, ptr [[A_PTR]], align 8{{$}}
1214 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17
1215 // CHECK-NEXT: store <25 x double> [[INS]], ptr [[A_PTR]], align 8
1216 // CHECK-NEXT: ret void
1218 a[2][3] -= 1.0;
1221 struct Foo {
1222 fx2x3_t mat;
1225 void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j) {
1226 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(ptr noundef %a, float noundef %f, i32 noundef %i, i32 noundef %j)
1227 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1228 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1229 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
1230 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1231 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1232 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
1233 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
1234 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
1235 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1236 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1237 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1238 // CHECK-NEXT: [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
1239 // CHECK-NEXT: [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
1240 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1241 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1242 // CHECK-NEXT: [[MAT2:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1243 // CHECK-NEXT: [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
1244 // CHECK-NEXT: store <6 x float> [[INS]], ptr %mat, align 4
1245 // CHECK-NEXT: ret void
1247 a->mat[i][j] += f;
1250 void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) {
1251 // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> noundef %a, i32 noundef %i, i32 noundef %j, <25 x double> noundef %b)
1252 // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1253 // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1254 // CHECK-NEXT: [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
1255 // NOOPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1256 // OPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1257 // CHECK-NEXT: [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
1258 // CHECK-NEXT: [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
1259 // CHECK-NEXT: [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
1260 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
1261 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27
1262 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1263 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1264 // CHECK-NEXT: [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
1265 // CHECK-NEXT: [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
1266 // NOOPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1267 // OPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1268 // CHECK-NEXT: [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
1269 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1270 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1271 // CHECK-NEXT: [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
1272 // CHECK-NEXT: [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
1273 // CHECK-NEXT: [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
1274 // NOOPT-NEXT: [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1275 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27
1276 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1277 // OPT-NEXT: [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1278 // CHECK-NEXT: [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
1279 // CHECK-NEXT: [[MI3:%.*]] = add nsw i32 [[MI2]], 2
1280 // CHECK-NEXT: [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
1281 // CHECK-NEXT: [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
1282 // CHECK-NEXT: [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
1283 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25
1284 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1285 // CHECK-NEXT: [[B:%.*]] = load <25 x double>, ptr [[B_PTR:%.*]], align 8{{$}}
1286 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
1287 // CHECK-NEXT: store <25 x double> [[INS]], ptr [[B_PTR]], align 8
1288 b[a[i][j]][a[j][i] + 2] = 1.5;