1 //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the X86 memory folding tables.
11 //===----------------------------------------------------------------------===//
13 #include "X86InstrFoldTables.h"
14 #include "X86InstrInfo.h"
15 #include "llvm/ADT/STLExtras.h"
21 // These tables are sorted by their RegOp value allowing them to be binary
22 // searched at runtime without the need for additional storage. The enum values
23 // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
24 // makes sorting these tables a simple matter of alphabetizing the table.
25 #include "X86GenFoldTables.inc"
27 // Table to map instructions safe to broadcast using a different width from the
29 static const X86FoldTableEntry BroadcastSizeTable2
[] = {
30 { X86::VANDNPDZ128rr
, X86::VANDNPSZ128rmb
, TB_BCAST_SS
},
31 { X86::VANDNPDZ256rr
, X86::VANDNPSZ256rmb
, TB_BCAST_SS
},
32 { X86::VANDNPDZrr
, X86::VANDNPSZrmb
, TB_BCAST_SS
},
33 { X86::VANDNPSZ128rr
, X86::VANDNPDZ128rmb
, TB_BCAST_SD
},
34 { X86::VANDNPSZ256rr
, X86::VANDNPDZ256rmb
, TB_BCAST_SD
},
35 { X86::VANDNPSZrr
, X86::VANDNPDZrmb
, TB_BCAST_SD
},
36 { X86::VANDPDZ128rr
, X86::VANDPSZ128rmb
, TB_BCAST_SS
},
37 { X86::VANDPDZ256rr
, X86::VANDPSZ256rmb
, TB_BCAST_SS
},
38 { X86::VANDPDZrr
, X86::VANDPSZrmb
, TB_BCAST_SS
},
39 { X86::VANDPSZ128rr
, X86::VANDPDZ128rmb
, TB_BCAST_SD
},
40 { X86::VANDPSZ256rr
, X86::VANDPDZ256rmb
, TB_BCAST_SD
},
41 { X86::VANDPSZrr
, X86::VANDPDZrmb
, TB_BCAST_SD
},
42 { X86::VORPDZ128rr
, X86::VORPSZ128rmb
, TB_BCAST_SS
},
43 { X86::VORPDZ256rr
, X86::VORPSZ256rmb
, TB_BCAST_SS
},
44 { X86::VORPDZrr
, X86::VORPSZrmb
, TB_BCAST_SS
},
45 { X86::VORPSZ128rr
, X86::VORPDZ128rmb
, TB_BCAST_SD
},
46 { X86::VORPSZ256rr
, X86::VORPDZ256rmb
, TB_BCAST_SD
},
47 { X86::VORPSZrr
, X86::VORPDZrmb
, TB_BCAST_SD
},
48 { X86::VPANDDZ128rr
, X86::VPANDQZ128rmb
, TB_BCAST_Q
},
49 { X86::VPANDDZ256rr
, X86::VPANDQZ256rmb
, TB_BCAST_Q
},
50 { X86::VPANDDZrr
, X86::VPANDQZrmb
, TB_BCAST_Q
},
51 { X86::VPANDNDZ128rr
, X86::VPANDNQZ128rmb
, TB_BCAST_Q
},
52 { X86::VPANDNDZ256rr
, X86::VPANDNQZ256rmb
, TB_BCAST_Q
},
53 { X86::VPANDNDZrr
, X86::VPANDNQZrmb
, TB_BCAST_Q
},
54 { X86::VPANDNQZ128rr
, X86::VPANDNDZ128rmb
, TB_BCAST_D
},
55 { X86::VPANDNQZ256rr
, X86::VPANDNDZ256rmb
, TB_BCAST_D
},
56 { X86::VPANDNQZrr
, X86::VPANDNDZrmb
, TB_BCAST_D
},
57 { X86::VPANDQZ128rr
, X86::VPANDDZ128rmb
, TB_BCAST_D
},
58 { X86::VPANDQZ256rr
, X86::VPANDDZ256rmb
, TB_BCAST_D
},
59 { X86::VPANDQZrr
, X86::VPANDDZrmb
, TB_BCAST_D
},
60 { X86::VPORDZ128rr
, X86::VPORQZ128rmb
, TB_BCAST_Q
},
61 { X86::VPORDZ256rr
, X86::VPORQZ256rmb
, TB_BCAST_Q
},
62 { X86::VPORDZrr
, X86::VPORQZrmb
, TB_BCAST_Q
},
63 { X86::VPORQZ128rr
, X86::VPORDZ128rmb
, TB_BCAST_D
},
64 { X86::VPORQZ256rr
, X86::VPORDZ256rmb
, TB_BCAST_D
},
65 { X86::VPORQZrr
, X86::VPORDZrmb
, TB_BCAST_D
},
66 { X86::VPXORDZ128rr
, X86::VPXORQZ128rmb
, TB_BCAST_Q
},
67 { X86::VPXORDZ256rr
, X86::VPXORQZ256rmb
, TB_BCAST_Q
},
68 { X86::VPXORDZrr
, X86::VPXORQZrmb
, TB_BCAST_Q
},
69 { X86::VPXORQZ128rr
, X86::VPXORDZ128rmb
, TB_BCAST_D
},
70 { X86::VPXORQZ256rr
, X86::VPXORDZ256rmb
, TB_BCAST_D
},
71 { X86::VPXORQZrr
, X86::VPXORDZrmb
, TB_BCAST_D
},
72 { X86::VXORPDZ128rr
, X86::VXORPSZ128rmb
, TB_BCAST_SS
},
73 { X86::VXORPDZ256rr
, X86::VXORPSZ256rmb
, TB_BCAST_SS
},
74 { X86::VXORPDZrr
, X86::VXORPSZrmb
, TB_BCAST_SS
},
75 { X86::VXORPSZ128rr
, X86::VXORPDZ128rmb
, TB_BCAST_SD
},
76 { X86::VXORPSZ256rr
, X86::VXORPDZ256rmb
, TB_BCAST_SD
},
77 { X86::VXORPSZrr
, X86::VXORPDZrmb
, TB_BCAST_SD
},
80 static const X86FoldTableEntry BroadcastSizeTable3
[] = {
81 { X86::VPTERNLOGDZ128rri
, X86::VPTERNLOGQZ128rmbi
, TB_BCAST_Q
},
82 { X86::VPTERNLOGDZ256rri
, X86::VPTERNLOGQZ256rmbi
, TB_BCAST_Q
},
83 { X86::VPTERNLOGDZrri
, X86::VPTERNLOGQZrmbi
, TB_BCAST_Q
},
84 { X86::VPTERNLOGQZ128rri
, X86::VPTERNLOGDZ128rmbi
, TB_BCAST_D
},
85 { X86::VPTERNLOGQZ256rri
, X86::VPTERNLOGDZ256rmbi
, TB_BCAST_D
},
86 { X86::VPTERNLOGQZrri
, X86::VPTERNLOGDZrmbi
, TB_BCAST_D
},
89 static const X86FoldTableEntry
*
90 lookupFoldTableImpl(ArrayRef
<X86FoldTableEntry
> Table
, unsigned RegOp
) {
92 #define CHECK_SORTED_UNIQUE(TABLE) \
93 assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted"); \
94 assert(std::adjacent_find(std::begin(Table), std::end(Table)) == \
96 #TABLE " is not unique");
98 // Make sure the tables are sorted.
99 static std::atomic
<bool> FoldTablesChecked(false);
100 if (!FoldTablesChecked
.load(std::memory_order_relaxed
)) {
101 CHECK_SORTED_UNIQUE(Table2Addr
)
102 CHECK_SORTED_UNIQUE(Table0
)
103 CHECK_SORTED_UNIQUE(Table1
)
104 CHECK_SORTED_UNIQUE(Table2
)
105 CHECK_SORTED_UNIQUE(Table3
)
106 CHECK_SORTED_UNIQUE(Table4
)
107 CHECK_SORTED_UNIQUE(BroadcastTable1
)
108 CHECK_SORTED_UNIQUE(BroadcastTable2
)
109 CHECK_SORTED_UNIQUE(BroadcastTable3
)
110 CHECK_SORTED_UNIQUE(BroadcastTable4
)
111 CHECK_SORTED_UNIQUE(BroadcastSizeTable2
)
112 CHECK_SORTED_UNIQUE(BroadcastSizeTable3
)
113 FoldTablesChecked
.store(true, std::memory_order_relaxed
);
117 const X86FoldTableEntry
*Data
= llvm::lower_bound(Table
, RegOp
);
118 if (Data
!= Table
.end() && Data
->KeyOp
== RegOp
&&
119 !(Data
->Flags
& TB_NO_FORWARD
))
124 const X86FoldTableEntry
*
125 llvm::lookupTwoAddrFoldTable(unsigned RegOp
) {
126 return lookupFoldTableImpl(Table2Addr
, RegOp
);
129 const X86FoldTableEntry
*
130 llvm::lookupFoldTable(unsigned RegOp
, unsigned OpNum
) {
131 ArrayRef
<X86FoldTableEntry
> FoldTable
;
133 FoldTable
= ArrayRef(Table0
);
135 FoldTable
= ArrayRef(Table1
);
137 FoldTable
= ArrayRef(Table2
);
139 FoldTable
= ArrayRef(Table3
);
141 FoldTable
= ArrayRef(Table4
);
145 return lookupFoldTableImpl(FoldTable
, RegOp
);
150 // This class stores the memory unfolding tables. It is instantiated as a
151 // function scope static variable to lazily init the unfolding table.
152 struct X86MemUnfoldTable
{
153 // Stores memory unfolding tables entries sorted by opcode.
154 std::vector
<X86FoldTableEntry
> Table
;
156 X86MemUnfoldTable() {
157 for (const X86FoldTableEntry
&Entry
: Table2Addr
)
158 // Index 0, folded load and store, no alignment requirement.
159 addTableEntry(Entry
, TB_INDEX_0
| TB_FOLDED_LOAD
| TB_FOLDED_STORE
);
161 for (const X86FoldTableEntry
&Entry
: Table0
)
162 // Index 0, mix of loads and stores.
163 addTableEntry(Entry
, TB_INDEX_0
);
165 for (const X86FoldTableEntry
&Entry
: Table1
)
166 // Index 1, folded load
167 addTableEntry(Entry
, TB_INDEX_1
| TB_FOLDED_LOAD
);
169 for (const X86FoldTableEntry
&Entry
: Table2
)
170 // Index 2, folded load
171 addTableEntry(Entry
, TB_INDEX_2
| TB_FOLDED_LOAD
);
173 for (const X86FoldTableEntry
&Entry
: Table3
)
174 // Index 3, folded load
175 addTableEntry(Entry
, TB_INDEX_3
| TB_FOLDED_LOAD
);
177 for (const X86FoldTableEntry
&Entry
: Table4
)
178 // Index 4, folded load
179 addTableEntry(Entry
, TB_INDEX_4
| TB_FOLDED_LOAD
);
182 for (const X86FoldTableEntry
&Entry
: BroadcastTable2
)
183 // Index 2, folded broadcast
184 addTableEntry(Entry
, TB_INDEX_2
| TB_FOLDED_LOAD
| TB_FOLDED_BCAST
);
186 for (const X86FoldTableEntry
&Entry
: BroadcastTable3
)
187 // Index 3, folded broadcast
188 addTableEntry(Entry
, TB_INDEX_3
| TB_FOLDED_LOAD
| TB_FOLDED_BCAST
);
190 for (const X86FoldTableEntry
&Entry
: BroadcastTable4
)
191 // Index 4, folded broadcast
192 addTableEntry(Entry
, TB_INDEX_4
| TB_FOLDED_LOAD
| TB_FOLDED_BCAST
);
194 // Sort the memory->reg unfold table.
195 array_pod_sort(Table
.begin(), Table
.end());
197 // Now that it's sorted, ensure its unique.
198 assert(std::adjacent_find(Table
.begin(), Table
.end()) == Table
.end() &&
199 "Memory unfolding table is not unique!");
202 void addTableEntry(const X86FoldTableEntry
&Entry
,
203 uint16_t ExtraFlags
) {
204 // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it.
205 if ((Entry
.Flags
& TB_NO_REVERSE
) == 0)
206 Table
.push_back({Entry
.DstOp
, Entry
.KeyOp
,
207 static_cast<uint16_t>(Entry
.Flags
| ExtraFlags
) });
212 const X86FoldTableEntry
*
213 llvm::lookupUnfoldTable(unsigned MemOp
) {
214 static X86MemUnfoldTable MemUnfoldTable
;
215 auto &Table
= MemUnfoldTable
.Table
;
216 auto I
= llvm::lower_bound(Table
, MemOp
);
217 if (I
!= Table
.end() && I
->KeyOp
== MemOp
)
224 // This class stores the memory -> broadcast folding tables. It is instantiated
225 // as a function scope static variable to lazily init the folding table.
226 struct X86BroadcastFoldTable
{
227 // Stores memory broadcast folding tables entries sorted by opcode.
228 std::vector
<X86FoldTableEntry
> Table
;
230 X86BroadcastFoldTable() {
232 for (const X86FoldTableEntry
&Reg2Bcst
: BroadcastTable2
) {
233 unsigned RegOp
= Reg2Bcst
.KeyOp
;
234 unsigned BcstOp
= Reg2Bcst
.DstOp
;
235 if (const X86FoldTableEntry
*Reg2Mem
= lookupFoldTable(RegOp
, 2)) {
236 unsigned MemOp
= Reg2Mem
->DstOp
;
237 uint16_t Flags
= Reg2Mem
->Flags
| Reg2Bcst
.Flags
| TB_INDEX_2
|
238 TB_FOLDED_LOAD
| TB_FOLDED_BCAST
;
239 Table
.push_back({MemOp
, BcstOp
, Flags
});
242 for (const X86FoldTableEntry
&Reg2Bcst
: BroadcastSizeTable2
) {
243 unsigned RegOp
= Reg2Bcst
.KeyOp
;
244 unsigned BcstOp
= Reg2Bcst
.DstOp
;
245 if (const X86FoldTableEntry
*Reg2Mem
= lookupFoldTable(RegOp
, 2)) {
246 unsigned MemOp
= Reg2Mem
->DstOp
;
247 uint16_t Flags
= Reg2Mem
->Flags
| Reg2Bcst
.Flags
| TB_INDEX_2
|
248 TB_FOLDED_LOAD
| TB_FOLDED_BCAST
;
249 Table
.push_back({MemOp
, BcstOp
, Flags
});
253 for (const X86FoldTableEntry
&Reg2Bcst
: BroadcastTable3
) {
254 unsigned RegOp
= Reg2Bcst
.KeyOp
;
255 unsigned BcstOp
= Reg2Bcst
.DstOp
;
256 if (const X86FoldTableEntry
*Reg2Mem
= lookupFoldTable(RegOp
, 3)) {
257 unsigned MemOp
= Reg2Mem
->DstOp
;
258 uint16_t Flags
= Reg2Mem
->Flags
| Reg2Bcst
.Flags
| TB_INDEX_3
|
259 TB_FOLDED_LOAD
| TB_FOLDED_BCAST
;
260 Table
.push_back({MemOp
, BcstOp
, Flags
});
263 for (const X86FoldTableEntry
&Reg2Bcst
: BroadcastSizeTable3
) {
264 unsigned RegOp
= Reg2Bcst
.KeyOp
;
265 unsigned BcstOp
= Reg2Bcst
.DstOp
;
266 if (const X86FoldTableEntry
*Reg2Mem
= lookupFoldTable(RegOp
, 3)) {
267 unsigned MemOp
= Reg2Mem
->DstOp
;
268 uint16_t Flags
= Reg2Mem
->Flags
| Reg2Bcst
.Flags
| TB_INDEX_3
|
269 TB_FOLDED_LOAD
| TB_FOLDED_BCAST
;
270 Table
.push_back({MemOp
, BcstOp
, Flags
});
274 for (const X86FoldTableEntry
&Reg2Bcst
: BroadcastTable4
) {
275 unsigned RegOp
= Reg2Bcst
.KeyOp
;
276 unsigned BcstOp
= Reg2Bcst
.DstOp
;
277 if (const X86FoldTableEntry
*Reg2Mem
= lookupFoldTable(RegOp
, 4)) {
278 unsigned MemOp
= Reg2Mem
->DstOp
;
279 uint16_t Flags
= Reg2Mem
->Flags
| Reg2Bcst
.Flags
| TB_INDEX_4
|
280 TB_FOLDED_LOAD
| TB_FOLDED_BCAST
;
281 Table
.push_back({MemOp
, BcstOp
, Flags
});
285 // Sort the memory->broadcast fold table.
286 array_pod_sort(Table
.begin(), Table
.end());
291 static bool matchBroadcastSize(const X86FoldTableEntry
&Entry
,
292 unsigned BroadcastBits
) {
293 switch (Entry
.Flags
& TB_BCAST_MASK
) {
296 return BroadcastBits
== 16;
299 return BroadcastBits
== 32;
302 return BroadcastBits
== 64;
307 const X86FoldTableEntry
*
308 llvm::lookupBroadcastFoldTable(unsigned MemOp
, unsigned BroadcastBits
) {
309 static X86BroadcastFoldTable BroadcastFoldTable
;
310 auto &Table
= BroadcastFoldTable
.Table
;
311 for (auto I
= llvm::lower_bound(Table
, MemOp
);
312 I
!= Table
.end() && I
->KeyOp
== MemOp
; ++I
) {
313 if (matchBroadcastSize(*I
, BroadcastBits
))