1 // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,NOOPT %s
2 // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s
5 typedef double dx5x5_t
__attribute__((matrix_type(5, 5)));
6 typedef float fx2x3_t
__attribute__((matrix_type(2, 3)));
7 typedef int ix9x3_t
__attribute__((matrix_type(9, 3)));
8 typedef unsigned long long ullx4x2_t
__attribute__((matrix_type(4, 2)));
10 // Floating point matrix/scalar additions.
12 void add_matrix_matrix_double(dx5x5_t a
, dx5x5_t b
, dx5x5_t c
) {
13 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b, <25 x double> noundef %c)
14 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
15 // NOOPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
16 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
17 // OPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
18 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[B]], [[C]]
19 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
24 void add_compound_assign_matrix_double(dx5x5_t a
, dx5x5_t b
) {
25 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
26 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
27 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
28 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
29 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
30 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[A]], [[B]]
31 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
36 void subtract_compound_assign_matrix_double(dx5x5_t a
, dx5x5_t b
) {
37 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b)
38 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
39 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
40 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
41 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
42 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[A]], [[B]]
43 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
48 void add_matrix_matrix_float(fx2x3_t a
, fx2x3_t b
, fx2x3_t c
) {
49 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b, <6 x float> noundef %c)
50 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
51 // NOOPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
52 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
53 // OPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
54 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[B]], [[C]]
55 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
60 void add_compound_assign_matrix_float(fx2x3_t a
, fx2x3_t b
) {
61 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
62 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
63 // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
64 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
65 // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
66 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[A]], [[B]]
67 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
72 void subtract_compound_assign_matrix_float(fx2x3_t a
, fx2x3_t b
) {
73 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b)
74 // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
75 // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
76 // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
77 // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
78 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[A]], [[B]]
79 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
84 void add_matrix_scalar_double_float(dx5x5_t a
, float vf
) {
85 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
86 // NOOPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
87 // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
88 // OPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
89 // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
90 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
91 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
92 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
93 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
94 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
99 void add_compound_matrix_scalar_double_float(dx5x5_t a
, float vf
) {
100 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
101 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
102 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
103 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
104 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
105 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
106 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
107 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
108 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
109 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
114 void subtract_compound_matrix_scalar_double_float(dx5x5_t a
, float vf
) {
115 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf)
116 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
117 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
118 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
119 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
120 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
121 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
122 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
123 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
124 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
129 void add_matrix_scalar_double_double(dx5x5_t a
, double vd
) {
130 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
131 // NOOPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
132 // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
133 // OPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
134 // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
135 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
136 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
137 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
138 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
143 void add_compound_matrix_scalar_double_double(dx5x5_t a
, double vd
) {
144 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
145 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
146 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
147 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
148 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
149 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
150 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
151 // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
152 // store <25 x double> [[RES]], ptr {{.*}}, align 8
156 void subtract_compound_matrix_scalar_double_double(dx5x5_t a
, double vd
) {
157 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd)
158 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
159 // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
160 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
161 // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
162 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
163 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
164 // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
165 // store <25 x double> [[RES]], ptr {{.*}}, align 8
169 void add_matrix_scalar_float_float(fx2x3_t b
, float vf
) {
170 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
171 // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
172 // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
173 // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
174 // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
175 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
176 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
177 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
178 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
183 void add_compound_matrix_scalar_float_float(fx2x3_t b
, float vf
) {
184 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
185 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
186 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
187 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
188 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
189 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
190 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
191 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
192 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
196 void subtract_compound_matrix_scalar_float_float(fx2x3_t b
, float vf
) {
197 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf)
198 // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}}
199 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}}
200 // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
201 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
202 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
203 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
204 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
205 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
209 void add_matrix_scalar_float_double(fx2x3_t b
, double vd
) {
210 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
211 // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
212 // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
213 // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
214 // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
215 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
216 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
217 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
218 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
219 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
224 void add_compound_matrix_scalar_float_double(fx2x3_t b
, double vd
) {
225 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
226 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
227 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
228 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
229 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
230 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
231 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
232 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
233 // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
234 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
238 void subtract_compound_matrix_scalar_float_double(fx2x3_t b
, double vd
) {
239 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd)
240 // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}}
241 // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
242 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
243 // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}}
244 // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
245 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
246 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
247 // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
248 // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4
252 // Integer matrix/scalar additions
254 void add_matrix_matrix_int(ix9x3_t a
, ix9x3_t b
, ix9x3_t c
) {
255 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b, <27 x i32> noundef %c)
256 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
257 // NOOPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
258 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
259 // OPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
260 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[B]], [[C]]
261 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4
265 void add_compound_matrix_matrix_int(ix9x3_t a
, ix9x3_t b
) {
266 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
267 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
268 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
269 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
270 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
271 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[A]], [[B]]
272 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4
276 void subtract_compound_matrix_matrix_int(ix9x3_t a
, ix9x3_t b
) {
277 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b)
278 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
279 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
280 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
281 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
282 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[A]], [[B]]
283 // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4
287 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a
, ullx4x2_t b
, ullx4x2_t c
) {
288 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b, <8 x i64> noundef %c)
289 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
290 // NOOPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
291 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
292 // OPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
293 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[B]], [[C]]
294 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
299 void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a
, ullx4x2_t b
) {
300 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
301 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
302 // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
303 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
304 // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
305 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[A]], [[B]]
306 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
311 void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a
, ullx4x2_t b
) {
312 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b)
313 // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
314 // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
315 // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
316 // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
317 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[A]], [[B]]
318 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
323 void add_matrix_scalar_int_short(ix9x3_t a
, short vs
) {
324 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
325 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
326 // NOOPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
327 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
328 // OPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
329 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
330 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i64 0
331 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
332 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
333 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
338 void add_compound_matrix_scalar_int_short(ix9x3_t a
, short vs
) {
339 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
340 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
341 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
342 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
343 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
344 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
345 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
346 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
347 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
348 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
353 void subtract_compound_matrix_scalar_int_short(ix9x3_t a
, short vs
) {
354 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs)
355 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
356 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
357 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
358 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
359 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
360 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
361 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
362 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
363 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
368 void add_matrix_scalar_int_long_int(ix9x3_t a
, long int vli
) {
369 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
370 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
371 // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
372 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
373 // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
374 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
375 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
376 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
377 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
378 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
383 void add_compound_matrix_scalar_int_long_int(ix9x3_t a
, long int vli
) {
384 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
385 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
386 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
387 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
388 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
389 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
390 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
391 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
392 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
393 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
398 void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a
, long int vli
) {
399 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli)
400 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
401 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
402 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
403 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
404 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
405 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
406 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
407 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
408 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
413 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a
, unsigned long long int vulli
) {
414 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
415 // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
416 // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
417 // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
418 // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
419 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
420 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
421 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
422 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
423 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
428 void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a
, unsigned long long int vulli
) {
429 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
430 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
431 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
432 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
433 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
434 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
435 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
436 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
437 // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
438 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
443 void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a
, unsigned long long int vulli
) {
444 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli)
445 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
446 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
447 // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
448 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}}
449 // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
450 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
451 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
452 // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
453 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
458 void add_matrix_scalar_long_long_int_short(ullx4x2_t b
, short vs
) {
459 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
460 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
461 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
462 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
463 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
464 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
465 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
466 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
467 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
468 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
473 void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b
, short vs
) {
474 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
475 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
476 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
477 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
478 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
479 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
480 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
481 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
482 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
483 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
488 void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b
, short vs
) {
489 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs)
490 // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}}
491 // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
492 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
493 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
494 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
495 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
496 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
497 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
498 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
503 void add_matrix_scalar_long_long_int_int(ullx4x2_t b
, long int vli
) {
504 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
505 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
506 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
507 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
508 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
509 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
510 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
511 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
512 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
517 void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b
, long int vli
) {
518 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
519 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
520 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
521 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
522 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
523 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
524 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
525 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
526 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
531 void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b
, long int vli
) {
532 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli)
533 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}}
534 // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
535 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}}
536 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
537 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
538 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
539 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
540 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
545 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b
, unsigned long long int vulli
) {
546 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long
547 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
548 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
549 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
550 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
551 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
552 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
553 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
554 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
558 void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b
, unsigned long long int vulli
) {
559 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long
560 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
561 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
562 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
563 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
564 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
565 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
566 // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
567 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
572 void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b
, unsigned long long int vulli
) {
573 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long
574 // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}}
575 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}}
576 // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
577 // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
578 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
579 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
580 // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
581 // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8
586 // Tests for matrix multiplication.
588 void multiply_matrix_matrix_double(dx5x5_t b
, dx5x5_t c
) {
589 // CHECK-LABEL: @multiply_matrix_matrix_double(
590 // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
591 // NOOPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
592 // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
593 // OPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
594 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
595 // CHECK-NEXT: store <25 x double> [[RES]], ptr %a, align 8
603 void multiply_compound_matrix_matrix_double(dx5x5_t b
, dx5x5_t c
) {
604 // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
605 // NOOPT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
606 // NOOPT-NEXT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
607 // OPT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
608 // OPT-NEXT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
609 // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
610 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
611 // CHECK-NEXT: ret void
615 typedef int ix3x9_t
__attribute__((matrix_type(3, 9)));
616 typedef int ix9x9_t
__attribute__((matrix_type(9, 9)));
617 // CHECK-LABEL: @multiply_matrix_matrix_int(
618 // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
619 // NOOPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
620 // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
621 // OPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
622 // CHECK-NEXT: [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
623 // CHECK-NEXT: store <81 x i32> [[RES]], ptr %a, align 4
626 void multiply_matrix_matrix_int(ix9x3_t b
, ix3x9_t c
) {
631 // CHECK-LABEL: @multiply_double_matrix_scalar_float(
632 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
633 // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
634 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
635 // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
636 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
637 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
638 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
639 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
640 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
641 // CHECK-NEXT: ret void
643 void multiply_double_matrix_scalar_float(dx5x5_t a
, float s
) {
647 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
648 // NOOPT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
649 // OPT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
650 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
651 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
652 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
653 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
654 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
655 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
656 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
657 // CHECK-NEXT: ret void
659 void multiply_compound_double_matrix_scalar_float(dx5x5_t a
, float s
) {
663 // CHECK-LABEL: @multiply_double_matrix_scalar_double(
664 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
665 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
666 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
667 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
668 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
669 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
670 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
671 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
672 // CHECK-NEXT: ret void
674 void multiply_double_matrix_scalar_double(dx5x5_t a
, double s
) {
678 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
679 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
680 // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
681 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
682 // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
683 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
684 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
685 // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
686 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
687 // CHECK-NEXT: ret void
688 void multiply_compound_double_matrix_scalar_double(dx5x5_t a
, double s
) {
692 // CHECK-LABEL: @multiply_float_matrix_scalar_double(
693 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
694 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
695 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
696 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
697 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
698 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
699 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
700 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
701 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
702 // CHECK-NEXT: ret void
704 void multiply_float_matrix_scalar_double(fx2x3_t b
, double s
) {
708 // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
709 // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
710 // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
711 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
712 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
713 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
714 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
715 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
716 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
717 // store <6 x float> %3, ptr [[MAT_ADDR]], align 4
719 void multiply_compound_float_matrix_scalar_double(fx2x3_t b
, double s
) {
723 // CHECK-LABEL: @multiply_int_matrix_scalar_short(
724 // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
725 // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
726 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
727 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
728 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
729 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
730 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
731 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
732 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
733 // CHECK-NEXT: ret void
735 void multiply_int_matrix_scalar_short(ix9x3_t b
, short s
) {
739 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
740 // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
741 // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
742 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
743 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
744 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
745 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
746 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
747 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
748 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
749 // CHECK-NEXT: ret void
751 void multiply_compound_int_matrix_scalar_short(ix9x3_t b
, short s
) {
755 // CHECK-LABEL: @multiply_int_matrix_scalar_ull(
756 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
757 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
758 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
759 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
760 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
761 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
762 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
763 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
764 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
765 // CHECK-NEXT: ret void
767 void multiply_int_matrix_scalar_ull(ix9x3_t b
, unsigned long long s
) {
771 void multiply_compound_int_matrix_scalar_ull(ix9x3_t b
, unsigned long long s
) {
772 // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull(
773 // NOOPT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
774 // OPT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
775 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
776 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
777 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
778 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
779 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
780 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
781 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
782 // CHECK-NEXT: ret void
787 // CHECK-LABEL: @multiply_float_matrix_constant(
788 // CHECK-NEXT: entry:
789 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
790 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
791 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
792 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
793 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00)
794 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
795 // CHECK-NEXT: ret void
797 void multiply_float_matrix_constant(fx2x3_t a
) {
801 // CHECK-LABEL: @multiply_compound_float_matrix_constant(
802 // CHECK-NEXT: entry:
803 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
804 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
805 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
806 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
807 // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00)
808 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
809 // CHECK-NEXT: ret void
810 void multiply_compound_float_matrix_constant(fx2x3_t a
) {
814 // CHECK-LABEL: @multiply_int_matrix_constant(
815 // CHECK-NEXT: entry:
816 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
817 // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
818 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
819 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
820 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> splat (i32 5), [[MAT]]
821 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
822 // CHECK-NEXT: ret void
824 void multiply_int_matrix_constant(ix9x3_t a
) {
828 // CHECK-LABEL: @multiply_compound_int_matrix_constant(
829 // CHECK-NEXT: entry:
830 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
831 // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
832 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
833 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
834 // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], splat (i32 5)
835 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
836 // CHECK-NEXT: ret void
838 void multiply_compound_int_matrix_constant(ix9x3_t a
) {
842 // CHECK-LABEL: @divide_double_matrix_scalar_float(
843 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
844 // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}}
845 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
846 // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
847 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
848 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
849 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
850 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
851 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
852 // CHECK-NEXT: ret void
854 void divide_double_matrix_scalar_float(dx5x5_t a
, float s
) {
858 // CHECK-LABEL: @divide_double_matrix_scalar_double(
859 // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
860 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
861 // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
862 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
863 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
864 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
865 // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
866 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
867 // CHECK-NEXT: ret void
869 void divide_double_matrix_scalar_double(dx5x5_t a
, double s
) {
873 // CHECK-LABEL: @divide_float_matrix_scalar_double(
874 // NOOPT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
875 // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}}
876 // OPT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
877 // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
878 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
879 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
880 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
881 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
882 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
883 // CHECK-NEXT: ret void
885 void divide_float_matrix_scalar_double(fx2x3_t b
, double s
) {
889 // CHECK-LABEL: @divide_int_matrix_scalar_short(
890 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
891 // NOOPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}}
892 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
893 // OPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
894 // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
895 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
896 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
897 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
898 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
899 // CHECK-NEXT: ret void
901 void divide_int_matrix_scalar_short(ix9x3_t b
, short s
) {
905 // CHECK-LABEL: @divide_int_matrix_scalar_ull(
906 // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
907 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
908 // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
909 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
910 // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
911 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
912 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
913 // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
914 // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4
915 // CHECK-NEXT: ret void
917 void divide_int_matrix_scalar_ull(ix9x3_t b
, unsigned long long s
) {
921 // CHECK-LABEL: @divide_ull_matrix_scalar_ull(
922 // NOOPT: [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
923 // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}}
924 // OPT: [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8, !tbaa !{{[0-9]+}}{{$}}
925 // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
926 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i64 0
927 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
928 // CHECK-NEXT: [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
929 // CHECK-NEXT: store <8 x i64> [[RES]], ptr [[MAT_ADDR]], align 8
930 // CHECK-NEXT: ret void
932 void divide_ull_matrix_scalar_ull(ullx4x2_t b
, unsigned long long s
) {
936 // CHECK-LABEL: @divide_float_matrix_constant(
937 // CHECK-NEXT: entry:
938 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
939 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
940 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
941 // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
942 // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], splat (float 2.500000e+00)
943 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
944 // CHECK-NEXT: ret void
946 void divide_float_matrix_constant(fx2x3_t a
) {
950 // Tests for the matrix type operators.
952 typedef double dx5x5_t
__attribute__((matrix_type(5, 5)));
953 typedef float fx2x3_t
__attribute__((matrix_type(2, 3)));
955 // Check that we can use matrix index expression on different floating point
956 // matrixes and indices.
957 void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a
, double d
, fx2x3_t b
, float e
, int j
, unsigned k
) {
958 // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double(
959 // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
960 // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
961 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
962 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5
963 // CHECK-NEXT: store <25 x double> [[MATINS]], ptr {{.*}}, align 8
964 // CHECK-NEXT: ret void
969 void insert_double_matrix_const_idx_i_u_double(dx5x5_t a
, double d
) {
970 // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double(
971 // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}}
972 // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
973 // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
974 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21
975 // CHECK-NEXT: store <25 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
976 // CHECK-NEXT: ret void
981 void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b
, float e
) {
982 // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float(
983 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
984 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
985 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
986 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3
987 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
988 // CHECK-NEXT: ret void
993 void insert_float_matrix_idx_i_u_float(fx2x3_t b
, float e
, int j
, unsigned k
) {
994 // CHECK-LABEL: @insert_float_matrix_idx_i_u_float(
995 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
996 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
997 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
998 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
999 // CHECK-NEXT: [[J_EXT:%.*]] = sext i32 [[J]] to i64
1000 // NOOPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4{{$}}
1001 // OPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1002 // CHECK-NEXT: [[K_EXT:%.*]] = zext i32 [[K]] to i64
1003 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
1004 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1005 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1006 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1007 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1008 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1009 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1010 // CHECK-NEXT: ret void
1015 void insert_float_matrix_idx_s_ull_float(fx2x3_t b
, float e
, short j
, unsigned long long k
) {
1016 // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float(
1017 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1018 // NOOPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2{{$}}
1019 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1020 // OPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1021 // CHECK-NEXT: [[J_EXT:%.*]] = sext i16 [[J]] to i64
1022 // NOOPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8{{$}}
1023 // OPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1024 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K]], 2
1025 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
1026 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1027 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1028 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1029 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
1030 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1031 // CHECK-NEXT: ret void
1036 // Check that we can can use matrix index expressions on integer matrixes.
1037 typedef int ix9x3_t
__attribute__((matrix_type(9, 3)));
1038 void insert_int_idx_expr(ix9x3_t a
, int i
) {
1039 // CHECK-LABEL: @insert_int_idx_expr(
1040 // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1041 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1042 // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1043 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1044 // CHECK-NEXT: [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
1045 // CHECK-NEXT: [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
1046 // CHECK-NEXT: [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
1047 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1048 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1049 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1050 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
1051 // CHECK-NEXT: store <27 x i32> [[MATINS]], ptr [[MAT_ADDR]], align 4
1052 // CHECK-NEXT: ret void
1054 a
[4 + i
][1 + 1u] = i
;
1057 // Check that we can can use matrix index expressions on FP and integer
1059 typedef int ix9x3_t
__attribute__((matrix_type(9, 3)));
1060 void insert_float_into_int_matrix(ix9x3_t
*a
, int i
) {
1061 // CHECK-LABEL: @insert_float_into_int_matrix(
1062 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1063 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1064 // NOOPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8{{$}}
1065 // OPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1066 // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR1]], align 4{{$}}
1067 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13
1068 // CHECK-NEXT: store <27 x i32> [[MATINS]], ptr [[MAT_ADDR1]], align 4
1069 // CHECK-NEXT: ret void
1074 // Check that we can use overloaded matrix index expressions on matrixes with
1075 // matching dimensions, but different element types.
1076 typedef double dx3x3_t
__attribute__((matrix_type(3, 3)));
1077 typedef float fx3x3_t
__attribute__((matrix_type(3, 3)));
1078 void insert_matching_dimensions1(dx3x3_t a
, double i
) {
1079 // CHECK-LABEL: @insert_matching_dimensions1(
1080 // NOOPT: [[I:%.*]] = load double, ptr %i.addr, align 8{{$}}
1081 // OPT: [[I:%.*]] = load double, ptr %i.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1082 // CHECK-NEXT: [[MAT:%.*]] = load <9 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}}
1083 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5
1084 // CHECK-NEXT: store <9 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8
1085 // CHECK-NEXT: ret void
1090 void insert_matching_dimensions(fx3x3_t b
, float e
) {
1091 // CHECK-LABEL: @insert_matching_dimensions(
1092 // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
1093 // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1094 // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1095 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7
1096 // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1097 // CHECK-NEXT: ret void
1102 double extract_double(dx5x5_t a
) {
1103 // CHECK-LABEL: @extract_double(
1104 // NOOPT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}}
1105 // OPT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
1106 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12
1107 // CHECK-NEXT: ret double [[MATEXT]]
1109 return a
[2][3 - 1u];
1112 double extract_float(fx3x3_t b
) {
1113 // CHECK-LABEL: @extract_float(
1114 // NOOPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4{{$}}
1115 // OPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1116 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5
1117 // CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double
1118 // CHECK-NEXT: ret double [[TO_DOUBLE]]
1123 int extract_int(ix9x3_t c
, unsigned long j
) {
1124 // CHECK-LABEL: @extract_int(
1125 // NOOPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1126 // NOOPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1127 // OPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1128 // OPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1129 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J2]], 9
1130 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
1131 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1132 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
1133 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1134 // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1135 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
1136 // CHECK-NEXT: ret i32 [[MATEXT]]
1141 typedef double dx3x2_t
__attribute__((matrix_type(3, 2)));
1143 double test_extract_matrix_pointer1(dx3x2_t
**ptr
, unsigned j
) {
1144 // CHECK-LABEL: @test_extract_matrix_pointer1(
1145 // NOOPT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1146 // OPT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1147 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
1148 // CHECK-NEXT: [[IDX:%.*]] = add i64 3, [[J_EXT]]
1149 // NOOPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1150 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
1151 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1152 // OPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1153 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 1
1154 // NOOPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1155 // OPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1156 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 2
1157 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1158 // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1159 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
1160 // CHECK-NEXT: ret double [[MATEXT]]
1162 return ptr
[1][2][j
][1];
1165 double test_extract_matrix_pointer2(dx3x2_t
**ptr
) {
1166 // CHECK-LABEL: @test_extract_matrix_pointer2(
1167 // CHECK-NEXT: entry:
1168 // NOOPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}}
1169 // OPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1170 // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 4
1171 // NOOPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}}
1172 // OPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1173 // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 6
1174 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}}
1175 // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}}
1176 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5
1177 // CHECK-NEXT: ret double [[MATEXT]]
1179 return (*(*(ptr
+ 4) + 6))[2][1 * 3 - 2];
1182 void insert_extract(dx5x5_t a
, fx3x3_t b
, unsigned long j
, short k
) {
1183 // CHECK-LABEL: @insert_extract(
1184 // NOOPT: [[K:%.*]] = load i16, ptr %k.addr, align 2{{$}}
1185 // OPT: [[K:%.*]] = load i16, ptr %k.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
1186 // CHECK-NEXT: [[K_EXT:%.*]] = sext i16 [[K]] to i64
1187 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
1188 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], 0
1189 // NOOPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}}
1190 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9
1191 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1192 // OPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
1193 // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]]
1194 // NOOPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8{{$}}
1195 // OPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
1196 // CHECK-NEXT: [[IDX3:%.*]] = mul i64 [[J]], 3
1197 // CHECK-NEXT: [[IDX4:%.*]] = add i64 [[IDX3]], 2
1198 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9
1199 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1200 // CHECK-NEXT: [[MAT2:%.*]] = load <9 x float>, ptr [[MAT_ADDR]], align 4{{$}}
1201 // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
1202 // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4
1203 // CHECK-NEXT: ret void
1208 void insert_compound_stmt(dx5x5_t a
) {
1209 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a)
1210 // CHECK: [[A:%.*]] = load <25 x double>, ptr [[A_PTR:%.*]], align 8{{$}}
1211 // CHECK-NEXT: [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
1212 // CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
1213 // CHECK-NEXT: [[A2:%.*]] = load <25 x double>, ptr [[A_PTR]], align 8{{$}}
1214 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17
1215 // CHECK-NEXT: store <25 x double> [[INS]], ptr [[A_PTR]], align 8
1216 // CHECK-NEXT: ret void
1225 void insert_compound_stmt_field(struct Foo
*a
, float f
, unsigned i
, unsigned j
) {
1226 // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(ptr noundef %a, float noundef %f, i32 noundef %i, i32 noundef %j)
1227 // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1228 // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1229 // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64
1230 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1231 // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1232 // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
1233 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
1234 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
1235 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1236 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1237 // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1238 // CHECK-NEXT: [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
1239 // CHECK-NEXT: [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
1240 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1241 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1242 // CHECK-NEXT: [[MAT2:%.*]] = load <6 x float>, ptr %mat, align 4{{$}}
1243 // CHECK-NEXT: [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
1244 // CHECK-NEXT: store <6 x float> [[INS]], ptr %mat, align 4
1245 // CHECK-NEXT: ret void
1250 void matrix_as_idx(ix9x3_t a
, int i
, int j
, dx5x5_t b
) {
1251 // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> noundef %a, i32 noundef %i, i32 noundef %j, <25 x double> noundef %b)
1252 // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1253 // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1254 // CHECK-NEXT: [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
1255 // NOOPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1256 // OPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1257 // CHECK-NEXT: [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
1258 // CHECK-NEXT: [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
1259 // CHECK-NEXT: [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
1260 // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}}
1261 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27
1262 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1263 // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1264 // CHECK-NEXT: [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
1265 // CHECK-NEXT: [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
1266 // NOOPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4{{$}}
1267 // OPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1268 // CHECK-NEXT: [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
1269 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}}
1270 // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
1271 // CHECK-NEXT: [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
1272 // CHECK-NEXT: [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
1273 // CHECK-NEXT: [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
1274 // NOOPT-NEXT: [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}}
1275 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27
1276 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1277 // OPT-NEXT: [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
1278 // CHECK-NEXT: [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
1279 // CHECK-NEXT: [[MI3:%.*]] = add nsw i32 [[MI2]], 2
1280 // CHECK-NEXT: [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
1281 // CHECK-NEXT: [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
1282 // CHECK-NEXT: [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
1283 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25
1284 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1285 // CHECK-NEXT: [[B:%.*]] = load <25 x double>, ptr [[B_PTR:%.*]], align 8{{$}}
1286 // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
1287 // CHECK-NEXT: store <25 x double> [[INS]], ptr [[B_PTR]], align 8
1288 b
[a
[i
][j
]][a
[j
][i
] + 2] = 1.5;