1 // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512f -target-feature +amx-int8 \
2 // RUN: -target-feature +amx-bf16 -target-feature +amx-fp16 -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
11 // This is an example code and integration test.
12 void test_api(int cond
, short row
, short col
) {
13 //CHECK-LABEL: @test_api
14 //CHECK-DAG: call x86_amx @llvm.x86.tileloadd64.internal
15 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
16 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
17 //CHECK-DAG: call x86_amx @llvm.x86.tdpbssd.internal
18 //CHECK-DAG: call void @llvm.x86.tilestored64.internal
19 __tile1024i a
= {row
, 8};
20 __tile1024i b
= {8, col
};
21 __tile1024i c
= {row
, col
};
24 __tile_loadd(&a
, buf
, STRIDE
);
25 __tile_loadd(&b
, buf
, STRIDE
);
26 __tile_loadd(&c
, buf
, STRIDE
);
28 __tile_loadd(&a
, buf2
, STRIDE
);
29 __tile_loadd(&b
, buf2
, STRIDE
);
30 __tile_loadd(&c
, buf2
, STRIDE
);
32 __tile_dpbssd(&c
, a
, b
);
33 __tile_stored(buf
, STRIDE
, c
);
36 void test_tile_loadd(short row
, short col
) {
37 //CHECK-LABEL: @test_tile_loadd
38 //CHECK-DAG: call x86_amx @llvm.x86.tileloadd64.internal
39 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
40 __tile1024i a
= {row
, col
};
41 __tile_loadd(&a
, buf
, STRIDE
);
44 void test_tile_stream_loadd(short row
, short col
) {
45 //CHECK-LABEL: @test_tile_stream_loadd
46 //CHECK-DAG: call x86_amx @llvm.x86.tileloaddt164.internal
47 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
48 __tile1024i a
= {row
, col
};
49 __tile_stream_loadd(&a
, buf
, STRIDE
);
52 void test_tile_dpbssd(__tile1024i a
, __tile1024i b
, __tile1024i c
) {
53 //CHECK-LABEL: @test_tile_dpbssd
54 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
55 //CHECK-DAG: call x86_amx @llvm.x86.tdpbssd.internal
56 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
57 __tile_dpbssd(&c
, a
, b
);
60 void test_tile_dpbsud(__tile1024i a
, __tile1024i b
, __tile1024i c
) {
61 //CHECK-LABEL: @test_tile_dpbsud
62 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
63 //CHECK-DAG: call x86_amx @llvm.x86.tdpbsud.internal
64 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
65 __tile_dpbsud(&c
, a
, b
);
68 void test_tile_dpbusd(__tile1024i a
, __tile1024i b
, __tile1024i c
) {
69 //CHECK-LABEL: @test_tile_dpbusd
70 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
71 //CHECK-DAG: call x86_amx @llvm.x86.tdpbusd.internal
72 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
73 __tile_dpbusd(&c
, a
, b
);
76 void test_tile_dpbuud(__tile1024i a
, __tile1024i b
, __tile1024i c
) {
77 //CHECK-LABEL: @test_tile_dpbuud
78 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
79 //CHECK-DAG: call x86_amx @llvm.x86.tdpbuud.internal
80 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
81 __tile_dpbuud(&c
, a
, b
);
84 void test_tile_stored(__tile1024i c
) {
85 //CHECK-LABEL: @test_tile_stored
86 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
87 //CHECK-DAG: call void @llvm.x86.tilestored64.internal
88 __tile_stored(buf
, STRIDE
, c
);
91 void test_tile_zero(__tile1024i c
) {
92 //CHECK-LABEL: @test_tile_zero
93 //CHECK-DAG: call x86_amx @llvm.x86.tilezero.internal
94 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
98 void test_tile_dpbf16ps(__tile1024i a
, __tile1024i b
, __tile1024i c
) {
99 //CHECK-LABEL: @test_tile_dpbf16ps
100 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
101 //CHECK-DAG: call x86_amx @llvm.x86.tdpbf16ps.internal
102 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
103 __tile_dpbf16ps(&a
, b
, c
);
106 void test_tile_dpfp16ps(__tile1024i a
, __tile1024i b
, __tile1024i c
) {
107 //CHECK-LABEL: @test_tile_dpfp16ps
108 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
109 //CHECK-DAG: call x86_amx @llvm.x86.tdpfp16ps.internal
110 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
111 __tile_dpfp16ps(&a
, b
, c
);