Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / X86 / amx_api.c
blob5b6d50da27c6dee5ef25598321ac82fb5d7853a9
1 // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512f -target-feature +amx-int8 \
2 // RUN: -target-feature +amx-bf16 -target-feature +amx-fp16 -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
4 #include <immintrin.h>
6 char buf[1024];
7 #define STRIDE 32
9 char buf2[1024];
11 // This is an example code and integration test.
12 void test_api(int cond, short row, short col) {
13 //CHECK-LABEL: @test_api
14 //CHECK-DAG: call x86_amx @llvm.x86.tileloadd64.internal
15 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
16 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
17 //CHECK-DAG: call x86_amx @llvm.x86.tdpbssd.internal
18 //CHECK-DAG: call void @llvm.x86.tilestored64.internal
19 __tile1024i a = {row, 8};
20 __tile1024i b = {8, col};
21 __tile1024i c = {row, col};
23 if (cond) {
24 __tile_loadd(&a, buf, STRIDE);
25 __tile_loadd(&b, buf, STRIDE);
26 __tile_loadd(&c, buf, STRIDE);
27 } else {
28 __tile_loadd(&a, buf2, STRIDE);
29 __tile_loadd(&b, buf2, STRIDE);
30 __tile_loadd(&c, buf2, STRIDE);
32 __tile_dpbssd(&c, a, b);
33 __tile_stored(buf, STRIDE, c);
36 void test_tile_loadd(short row, short col) {
37 //CHECK-LABEL: @test_tile_loadd
38 //CHECK-DAG: call x86_amx @llvm.x86.tileloadd64.internal
39 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
40 __tile1024i a = {row, col};
41 __tile_loadd(&a, buf, STRIDE);
44 void test_tile_stream_loadd(short row, short col) {
45 //CHECK-LABEL: @test_tile_stream_loadd
46 //CHECK-DAG: call x86_amx @llvm.x86.tileloaddt164.internal
47 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
48 __tile1024i a = {row, col};
49 __tile_stream_loadd(&a, buf, STRIDE);
52 void test_tile_dpbssd(__tile1024i a, __tile1024i b, __tile1024i c) {
53 //CHECK-LABEL: @test_tile_dpbssd
54 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
55 //CHECK-DAG: call x86_amx @llvm.x86.tdpbssd.internal
56 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
57 __tile_dpbssd(&c, a, b);
60 void test_tile_dpbsud(__tile1024i a, __tile1024i b, __tile1024i c) {
61 //CHECK-LABEL: @test_tile_dpbsud
62 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
63 //CHECK-DAG: call x86_amx @llvm.x86.tdpbsud.internal
64 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
65 __tile_dpbsud(&c, a, b);
68 void test_tile_dpbusd(__tile1024i a, __tile1024i b, __tile1024i c) {
69 //CHECK-LABEL: @test_tile_dpbusd
70 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
71 //CHECK-DAG: call x86_amx @llvm.x86.tdpbusd.internal
72 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
73 __tile_dpbusd(&c, a, b);
76 void test_tile_dpbuud(__tile1024i a, __tile1024i b, __tile1024i c) {
77 //CHECK-LABEL: @test_tile_dpbuud
78 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
79 //CHECK-DAG: call x86_amx @llvm.x86.tdpbuud.internal
80 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
81 __tile_dpbuud(&c, a, b);
84 void test_tile_stored(__tile1024i c) {
85 //CHECK-LABEL: @test_tile_stored
86 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
87 //CHECK-DAG: call void @llvm.x86.tilestored64.internal
88 __tile_stored(buf, STRIDE, c);
91 void test_tile_zero(__tile1024i c) {
92 //CHECK-LABEL: @test_tile_zero
93 //CHECK-DAG: call x86_amx @llvm.x86.tilezero.internal
94 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
95 __tile_zero(&c);
98 void test_tile_dpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
99 //CHECK-LABEL: @test_tile_dpbf16ps
100 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
101 //CHECK-DAG: call x86_amx @llvm.x86.tdpbf16ps.internal
102 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
103 __tile_dpbf16ps(&a, b, c);
106 void test_tile_dpfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
107 //CHECK-LABEL: @test_tile_dpfp16ps
108 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
109 //CHECK-DAG: call x86_amx @llvm.x86.tdpfp16ps.internal
110 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
111 __tile_dpfp16ps(&a, b, c);