Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / matrix-type-operators-fast-math.c
blob724b2828302861caf048837b834958ce3023e09e
1 // RUN: %clang_cc1 -ffast-math -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
3 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
4 typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
5 typedef int ix9x3_t __attribute__((matrix_type(9, 3)));
6 typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2)));
8 // Floating point matrix/scalar additions.
10 void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
11 // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> noundef nofpclass(nan inf) %a, <25 x double> noundef nofpclass(nan inf) %b, <25 x double> noundef nofpclass(nan inf) %c)
12 // CHECK: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8
13 // CHECK-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8
14 // CHECK-NEXT: [[RES:%.*]] = fadd reassoc nnan ninf nsz arcp afn <25 x double> [[B]], [[C]]
15 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
17 a = b + c;
20 void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
21 // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> noundef nofpclass(nan inf) %a, <25 x double> noundef nofpclass(nan inf) %b)
22 // CHECK: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8
23 // CHECK-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8
24 // CHECK-NEXT: [[RES:%.*]] = fadd reassoc nnan ninf nsz arcp afn <25 x double> [[A]], [[B]]
25 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
27 a += b;
30 void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
31 // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> noundef nofpclass(nan inf) %a, <25 x double> noundef nofpclass(nan inf) %b)
32 // CHECK: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8
33 // CHECK-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8
34 // CHECK-NEXT: [[RES:%.*]] = fsub reassoc nnan ninf nsz arcp afn <25 x double> [[A]], [[B]]
35 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
37 a -= b;
40 void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
41 // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> noundef nofpclass(nan inf) %a, float noundef nofpclass(nan inf) %vf)
42 // CHECK: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8
43 // CHECK-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4
44 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
45 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
46 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
47 // CHECK-NEXT: [[RES:%.*]] = fadd reassoc nnan ninf nsz arcp afn <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
48 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
50 a = a + vf;
53 void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
54 // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> noundef nofpclass(nan inf) %a, float noundef nofpclass(nan inf) %vf)
55 // CHECK: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4
56 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
57 // CHECK-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8
58 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
59 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
60 // CHECK-NEXT: [[RES:%.*]] = fadd reassoc nnan ninf nsz arcp afn <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
61 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
63 a += vf;
66 void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
67 // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> noundef nofpclass(nan inf) %a, float noundef nofpclass(nan inf) %vf)
68 // CHECK: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4
69 // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
70 // CHECK-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8
71 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
72 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
73 // CHECK-NEXT: [[RES:%.*]] = fsub reassoc nnan ninf nsz arcp afn <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
74 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
76 a -= vf;
79 // Tests for matrix multiplication.
81 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
82 // CHECK-LABEL: @multiply_matrix_matrix_double(
83 // CHECK: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8
84 // CHECK-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8
85 // CHECK-NEXT: [[RES:%.*]] = call reassoc nnan ninf nsz arcp afn <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
86 // CHECK-NEXT: store <25 x double> [[RES]], ptr %a, align 8
87 // CHECK-NEXT: ret void
90 dx5x5_t a;
91 a = b * c;
94 void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
95 // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
96 // CHECK: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8
97 // CHECK-NEXT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8
98 // CHECK-NEXT: [[RES:%.*]] = call reassoc nnan ninf nsz arcp afn <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
99 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
100 // CHECK-NEXT: ret void
101 b *= c;
104 // CHECK-LABEL: @multiply_double_matrix_scalar_float(
105 // CHECK: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8
106 // CHECK-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4
107 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
108 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
109 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
110 // CHECK-NEXT: [[RES:%.*]] = fmul reassoc nnan ninf nsz arcp afn <25 x double> [[A]], [[VECSPLAT]]
111 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
112 // CHECK-NEXT: ret void
114 void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
115 a = a * s;
118 // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
119 // CHECK: [[S:%.*]] = load float, ptr %s.addr, align 4
120 // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
121 // CHECK-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8
122 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
123 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
124 // CHECK-NEXT: [[RES:%.*]] = fmul reassoc nnan ninf nsz arcp afn <25 x double> [[A]], [[VECSPLAT]]
125 // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8
126 // CHECK-NEXT: ret void
128 void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
129 a *= s;
132 // CHECK-LABEL: @divide_float_matrix_scalar_double(
133 // CHECK: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4
134 // CHECK-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8
135 // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
136 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
137 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
138 // CHECK-NEXT: [[RES:%.*]] = fdiv reassoc nnan ninf nsz arcp afn <6 x float> [[MAT]], [[VECSPLAT]]
139 // CHECK-NEXT: store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4
140 // CHECK-NEXT: ret void
142 void divide_float_matrix_scalar_double(fx2x3_t b, double s) {
143 b = b / s;