Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / matrix-type.c
blobe52dc20229db93749c6fc0576a981200781efc80
1 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
3 #if !__has_extension(matrix_types)
4 #error Expected extension 'matrix_types' to be enabled
5 #endif
7 #if !__has_extension(matrix_types_scalar_division)
8 #error Expected extension 'matrix_types_scalar_division' to be enabled
9 #endif
11 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
13 // CHECK: %struct.Matrix = type { i8, [12 x float], float }
15 void load_store_double(dx5x5_t *a, dx5x5_t *b) {
16 // CHECK-LABEL: define{{.*}} void @load_store_double(
17 // CHECK-NEXT: entry:
18 // CHECK-NEXT: %a.addr = alloca ptr, align 8
19 // CHECK-NEXT: %b.addr = alloca ptr, align 8
20 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8
21 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8
22 // CHECK-NEXT: %0 = load ptr, ptr %b.addr, align 8
23 // CHECK-NEXT: %1 = load <25 x double>, ptr %0, align 8
24 // CHECK-NEXT: %2 = load ptr, ptr %a.addr, align 8
25 // CHECK-NEXT: store <25 x double> %1, ptr %2, align 8
26 // CHECK-NEXT: ret void
28 *a = *b;
31 typedef float fx3x4_t __attribute__((matrix_type(3, 4)));
32 void load_store_float(fx3x4_t *a, fx3x4_t *b) {
33 // CHECK-LABEL: define{{.*}} void @load_store_float(
34 // CHECK-NEXT: entry:
35 // CHECK-NEXT: %a.addr = alloca ptr, align 8
36 // CHECK-NEXT: %b.addr = alloca ptr, align 8
37 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8
38 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8
39 // CHECK-NEXT: %0 = load ptr, ptr %b.addr, align 8
40 // CHECK-NEXT: %1 = load <12 x float>, ptr %0, align 4
41 // CHECK-NEXT: %2 = load ptr, ptr %a.addr, align 8
42 // CHECK-NEXT: store <12 x float> %1, ptr %2, align 4
43 // CHECK-NEXT: ret void
45 *a = *b;
48 typedef int ix3x4_t __attribute__((matrix_type(4, 3)));
49 void load_store_int(ix3x4_t *a, ix3x4_t *b) {
50 // CHECK-LABEL: define{{.*}} void @load_store_int(
51 // CHECK-NEXT: entry:
52 // CHECK-NEXT: %a.addr = alloca ptr, align 8
53 // CHECK-NEXT: %b.addr = alloca ptr, align 8
54 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8
55 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8
56 // CHECK-NEXT: %0 = load ptr, ptr %b.addr, align 8
57 // CHECK-NEXT: %1 = load <12 x i32>, ptr %0, align 4
58 // CHECK-NEXT: %2 = load ptr, ptr %a.addr, align 8
59 // CHECK-NEXT: store <12 x i32> %1, ptr %2, align 4
60 // CHECK-NEXT: ret void
62 *a = *b;
65 typedef unsigned long long ullx3x4_t __attribute__((matrix_type(4, 3)));
66 void load_store_ull(ullx3x4_t *a, ullx3x4_t *b) {
67 // CHECK-LABEL: define{{.*}} void @load_store_ull(
68 // CHECK-NEXT: entry:
69 // CHECK-NEXT: %a.addr = alloca ptr, align 8
70 // CHECK-NEXT: %b.addr = alloca ptr, align 8
71 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8
72 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8
73 // CHECK-NEXT: %0 = load ptr, ptr %b.addr, align 8
74 // CHECK-NEXT: %1 = load <12 x i64>, ptr %0, align 8
75 // CHECK-NEXT: %2 = load ptr, ptr %a.addr, align 8
76 // CHECK-NEXT: store <12 x i64> %1, ptr %2, align 8
77 // CHECK-NEXT: ret void
79 *a = *b;
82 typedef __fp16 fp16x3x4_t __attribute__((matrix_type(4, 3)));
83 void load_store_fp16(fp16x3x4_t *a, fp16x3x4_t *b) {
84 // CHECK-LABEL: define{{.*}} void @load_store_fp16(
85 // CHECK-NEXT: entry:
86 // CHECK-NEXT: %a.addr = alloca ptr, align 8
87 // CHECK-NEXT: %b.addr = alloca ptr, align 8
88 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8
89 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8
90 // CHECK-NEXT: %0 = load ptr, ptr %b.addr, align 8
91 // CHECK-NEXT: %1 = load <12 x half>, ptr %0, align 2
92 // CHECK-NEXT: %2 = load ptr, ptr %a.addr, align 8
93 // CHECK-NEXT: store <12 x half> %1, ptr %2, align 2
94 // CHECK-NEXT: ret void
96 *a = *b;
99 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
101 void parameter_passing(fx3x3_t a, fx3x3_t *b) {
102 // CHECK-LABEL: define{{.*}} void @parameter_passing(
103 // CHECK-NEXT: entry:
104 // CHECK-NEXT: %a.addr = alloca [9 x float], align 4
105 // CHECK-NEXT: %b.addr = alloca ptr, align 8
106 // CHECK-NEXT: store <9 x float> %a, ptr %a.addr, align 4
107 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8
108 // CHECK-NEXT: %0 = load <9 x float>, ptr %a.addr, align 4
109 // CHECK-NEXT: %1 = load ptr, ptr %b.addr, align 8
110 // CHECK-NEXT: store <9 x float> %0, ptr %1, align 4
111 // CHECK-NEXT: ret void
112 *b = a;
115 fx3x3_t return_matrix(fx3x3_t *a) {
116 // CHECK-LABEL: define{{.*}} <9 x float> @return_matrix
117 // CHECK-NEXT: entry:
118 // CHECK-NEXT: %a.addr = alloca ptr, align 8
119 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8
120 // CHECK-NEXT: %0 = load ptr, ptr %a.addr, align 8
121 // CHECK-NEXT: %1 = load <9 x float>, ptr %0, align 4
122 // CHECK-NEXT: ret <9 x float> %1
123 return *a;
126 typedef struct {
127 char Tmp1;
128 fx3x4_t Data;
129 float Tmp2;
130 } Matrix;
132 void matrix_struct(Matrix *a, Matrix *b) {
133 // CHECK-LABEL: define{{.*}} void @matrix_struct(
134 // CHECK-NEXT: entry:
135 // CHECK-NEXT: %a.addr = alloca ptr, align 8
136 // CHECK-NEXT: %b.addr = alloca ptr, align 8
137 // CHECK-NEXT: store ptr %a, ptr %a.addr, align 8
138 // CHECK-NEXT: store ptr %b, ptr %b.addr, align 8
139 // CHECK-NEXT: %0 = load ptr, ptr %a.addr, align 8
140 // CHECK-NEXT: %Data = getelementptr inbounds %struct.Matrix, ptr %0, i32 0, i32 1
141 // CHECK-NEXT: %1 = load <12 x float>, ptr %Data, align 4
142 // CHECK-NEXT: %2 = load ptr, ptr %b.addr, align 8
143 // CHECK-NEXT: %Data1 = getelementptr inbounds %struct.Matrix, ptr %2, i32 0, i32 1
144 // CHECK-NEXT: store <12 x float> %1, ptr %Data1, align 4
145 // CHECK-NEXT: ret void
146 b->Data = a->Data;
149 typedef double dx4x4_t __attribute__((matrix_type(4, 4)));
150 void matrix_inline_asm_memory_readwrite(void) {
151 // CHECK-LABEL: define{{.*}} void @matrix_inline_asm_memory_readwrite()
152 // CHECK-NEXT: entry:
153 // CHECK-NEXT: [[ALLOCA:%.+]] = alloca [16 x double], align 8
154 // CHECK-NEXT: [[VAL:%.+]] = load <16 x double>, ptr [[ALLOCA]], align 8
155 // CHECK-NEXT: call void asm sideeffect "", "=*r|m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(<16 x double>) [[ALLOCA]], <16 x double> [[VAL]])
156 // CHECK-NEXT: ret void
158 dx4x4_t m;
159 asm volatile(""
160 : "+r,m"(m)
162 : "memory");