1 //===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the implementation of the classes providing information
10 // about existing X86 FMA3 opcodes, classifying and grouping them.
12 //===----------------------------------------------------------------------===//
14 #include "X86InstrFMA3Info.h"
15 #include "X86InstrInfo.h"
22 #define FMA3GROUP(Name, Suf, Attrs) \
23 { { X86::Name##132##Suf, X86::Name##213##Suf, X86::Name##231##Suf }, Attrs },
25 #define FMA3GROUP_MASKED(Name, Suf, Attrs) \
26 FMA3GROUP(Name, Suf, Attrs) \
27 FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
28 FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
30 #define FMA3GROUP_MASKED_INT(Name, Suf, Attrs) \
31 FMA3GROUP(Name, Suf##_Int, Attrs) \
32 FMA3GROUP(Name, Suf##k_Int, Attrs | X86InstrFMA3Group::KMergeMasked) \
33 FMA3GROUP(Name, Suf##kz_Int, Attrs | X86InstrFMA3Group::KZeroMasked)
35 #define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
36 FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
37 FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
38 FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
39 FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
40 FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
41 FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
43 #define FMA3GROUP_PACKED_WIDTHS_ALL(Name, Suf, Attrs) \
44 FMA3GROUP(Name, Suf##Ym, Attrs) \
45 FMA3GROUP(Name, Suf##Yr, Attrs) \
46 FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
47 FMA3GROUP(Name, Suf##m, Attrs) \
48 FMA3GROUP(Name, Suf##r, Attrs)
50 #define FMA3GROUP_PACKED_DHS(Name, Attrs) \
51 FMA3GROUP_PACKED_WIDTHS_ALL(Name, PD, Attrs) \
52 FMA3GROUP_PACKED_WIDTHS_Z(Name, PH, Attrs) \
53 FMA3GROUP_PACKED_WIDTHS_ALL(Name, PS, Attrs)
55 #define FMA3GROUP_PACKED_BF16(Name, Attrs) \
56 FMA3GROUP_PACKED_WIDTHS_Z(Name, NEPBF16, Attrs)
58 #define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
59 FMA3GROUP(Name, Suf##Zm, Attrs) \
60 FMA3GROUP_MASKED_INT(Name, Suf##Zm, Attrs | X86InstrFMA3Group::Intrinsic) \
61 FMA3GROUP(Name, Suf##Zr, Attrs) \
62 FMA3GROUP_MASKED_INT(Name, Suf##Zr, Attrs | X86InstrFMA3Group::Intrinsic) \
64 #define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \
65 FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
66 FMA3GROUP(Name, Suf##m, Attrs) \
67 FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
68 FMA3GROUP(Name, Suf##r, Attrs) \
69 FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
71 #define FMA3GROUP_SCALAR(Name, Attrs) \
72 FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SD, Attrs) \
73 FMA3GROUP_SCALAR_WIDTHS_Z(Name, SH, Attrs) \
74 FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SS, Attrs)
76 #define FMA3GROUP_FULL(Name, Attrs) \
77 FMA3GROUP_PACKED_BF16(Name, Attrs) \
78 FMA3GROUP_PACKED_DHS(Name, Attrs) \
79 FMA3GROUP_SCALAR(Name, Attrs)
81 static const X86InstrFMA3Group Groups
[] = {
82 FMA3GROUP_FULL(VFMADD
, 0)
83 FMA3GROUP_PACKED_DHS(VFMADDSUB
, 0)
84 FMA3GROUP_FULL(VFMSUB
, 0)
85 FMA3GROUP_PACKED_DHS(VFMSUBADD
, 0)
86 FMA3GROUP_FULL(VFNMADD
, 0)
87 FMA3GROUP_FULL(VFNMSUB
, 0)
90 #define FMA3GROUP_PACKED_AVX512_WIDTHS(Name, Type, Suf, Attrs) \
91 FMA3GROUP_MASKED(Name, Type##Z128##Suf, Attrs) \
92 FMA3GROUP_MASKED(Name, Type##Z256##Suf, Attrs) \
93 FMA3GROUP_MASKED(Name, Type##Z##Suf, Attrs)
95 #define FMA3GROUP_PACKED_AVX512_ALL(Name, Suf, Attrs) \
96 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, NEPBF16, Suf, Attrs) \
97 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
98 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
99 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
101 #define FMA3GROUP_PACKED_AVX512_DHS(Name, Suf, Attrs) \
102 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
103 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
104 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
106 #define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
107 FMA3GROUP_MASKED(Name, PDZ256##Suf, Attrs) \
108 FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
109 FMA3GROUP_MASKED(Name, PHZ256##Suf, Attrs) \
110 FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs) \
111 FMA3GROUP_MASKED(Name, PSZ256##Suf, Attrs) \
112 FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
114 #define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \
115 FMA3GROUP(Name, SDZ##Suf, Attrs) \
116 FMA3GROUP_MASKED_INT(Name, SDZ##Suf, Attrs) \
117 FMA3GROUP(Name, SHZ##Suf, Attrs) \
118 FMA3GROUP_MASKED_INT(Name, SHZ##Suf, Attrs) \
119 FMA3GROUP(Name, SSZ##Suf, Attrs) \
120 FMA3GROUP_MASKED_INT(Name, SSZ##Suf, Attrs)
122 static const X86InstrFMA3Group BroadcastGroups
[] = {
123 FMA3GROUP_PACKED_AVX512_ALL(VFMADD
, mb
, 0)
124 FMA3GROUP_PACKED_AVX512_DHS(VFMADDSUB
, mb
, 0)
125 FMA3GROUP_PACKED_AVX512_ALL(VFMSUB
, mb
, 0)
126 FMA3GROUP_PACKED_AVX512_DHS(VFMSUBADD
, mb
, 0)
127 FMA3GROUP_PACKED_AVX512_ALL(VFNMADD
, mb
, 0)
128 FMA3GROUP_PACKED_AVX512_ALL(VFNMSUB
, mb
, 0)
131 static const X86InstrFMA3Group RoundGroups
[] = {
132 FMA3GROUP_PACKED_AVX512_ROUND(VFMADD
, rb
, 0)
133 FMA3GROUP_SCALAR_AVX512_ROUND(VFMADD
, rb
, X86InstrFMA3Group::Intrinsic
)
134 FMA3GROUP_PACKED_AVX512_ROUND(VFMADDSUB
, rb
, 0)
135 FMA3GROUP_PACKED_AVX512_ROUND(VFMSUB
, rb
, 0)
136 FMA3GROUP_SCALAR_AVX512_ROUND(VFMSUB
, rb
, X86InstrFMA3Group::Intrinsic
)
137 FMA3GROUP_PACKED_AVX512_ROUND(VFMSUBADD
, rb
, 0)
138 FMA3GROUP_PACKED_AVX512_ROUND(VFNMADD
, rb
, 0)
139 FMA3GROUP_SCALAR_AVX512_ROUND(VFNMADD
, rb
, X86InstrFMA3Group::Intrinsic
)
140 FMA3GROUP_PACKED_AVX512_ROUND(VFNMSUB
, rb
, 0)
141 FMA3GROUP_SCALAR_AVX512_ROUND(VFNMSUB
, rb
, X86InstrFMA3Group::Intrinsic
)
144 static void verifyTables() {
146 static std::atomic
<bool> TableChecked(false);
147 if (!TableChecked
.load(std::memory_order_relaxed
)) {
148 assert(llvm::is_sorted(Groups
) && llvm::is_sorted(RoundGroups
) &&
149 llvm::is_sorted(BroadcastGroups
) && "FMA3 tables not sorted!");
150 TableChecked
.store(true, std::memory_order_relaxed
);
155 /// Returns a reference to a group of FMA3 opcodes to where the given
156 /// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
157 /// and not included into any FMA3 group, then nullptr is returned.
158 const X86InstrFMA3Group
*llvm::getFMA3Group(unsigned Opcode
, uint64_t TSFlags
) {
160 // FMA3 instructions have a well defined encoding pattern we can exploit.
161 uint8_t BaseOpcode
= X86II::getBaseOpcodeFor(TSFlags
);
162 bool IsFMA3Opcode
= ((BaseOpcode
>= 0x96 && BaseOpcode
<= 0x9F) ||
163 (BaseOpcode
>= 0xA6 && BaseOpcode
<= 0xAF) ||
164 (BaseOpcode
>= 0xB6 && BaseOpcode
<= 0xBF));
165 bool IsFMA3Encoding
= ((TSFlags
& X86II::EncodingMask
) == X86II::VEX
&&
166 (TSFlags
& X86II::OpMapMask
) == X86II::T8
) ||
167 ((TSFlags
& X86II::EncodingMask
) == X86II::EVEX
&&
168 ((TSFlags
& X86II::OpMapMask
) == X86II::T8
||
169 (TSFlags
& X86II::OpMapMask
) == X86II::T_MAP6
));
170 bool IsFMA3Prefix
= (TSFlags
& X86II::OpPrefixMask
) == X86II::PD
||
171 (TSFlags
& X86II::OpPrefixMask
) == 0; // X86II::PS
172 if (!IsFMA3Opcode
|| !IsFMA3Encoding
|| !IsFMA3Prefix
)
177 ArrayRef
<X86InstrFMA3Group
> Table
;
178 if (TSFlags
& X86II::EVEX_RC
)
179 Table
= ArrayRef(RoundGroups
);
180 else if (TSFlags
& X86II::EVEX_B
)
181 Table
= ArrayRef(BroadcastGroups
);
183 Table
= ArrayRef(Groups
);
185 // FMA 132 instructions have an opcode of 0x96-0x9F
186 // FMA 213 instructions have an opcode of 0xA6-0xAF
187 // FMA 231 instructions have an opcode of 0xB6-0xBF
188 unsigned FormIndex
= ((BaseOpcode
- 0x90) >> 4) & 0x3;
190 auto I
= partition_point(Table
, [=](const X86InstrFMA3Group
&Group
) {
191 return Group
.Opcodes
[FormIndex
] < Opcode
;
193 assert(I
!= Table
.end() && I
->Opcodes
[FormIndex
] == Opcode
&&
194 "Couldn't find FMA3 opcode!");