[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / llvm / lib / Target / X86 / X86InstrFoldTables.cpp
blob63136af2295f4b5cb6ebbeb651dfd16832fcbaa3
1 //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 memory folding tables.
11 //===----------------------------------------------------------------------===//
13 #include "X86InstrFoldTables.h"
14 #include "X86InstrInfo.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include <atomic>
17 #include <vector>
19 using namespace llvm;
21 // These tables are sorted by their RegOp value allowing them to be binary
22 // searched at runtime without the need for additional storage. The enum values
23 // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
24 // makes sorting these tables a simple matter of alphabetizing the table.
25 #include "X86GenFoldTables.inc"
27 // Table to map instructions safe to broadcast using a different width from the
28 // element width.
29 static const X86FoldTableEntry BroadcastSizeTable2[] = {
30 { X86::VANDNPDZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS },
31 { X86::VANDNPDZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS },
32 { X86::VANDNPDZrr, X86::VANDNPSZrmb, TB_BCAST_SS },
33 { X86::VANDNPSZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD },
34 { X86::VANDNPSZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD },
35 { X86::VANDNPSZrr, X86::VANDNPDZrmb, TB_BCAST_SD },
36 { X86::VANDPDZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS },
37 { X86::VANDPDZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS },
38 { X86::VANDPDZrr, X86::VANDPSZrmb, TB_BCAST_SS },
39 { X86::VANDPSZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD },
40 { X86::VANDPSZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD },
41 { X86::VANDPSZrr, X86::VANDPDZrmb, TB_BCAST_SD },
42 { X86::VORPDZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS },
43 { X86::VORPDZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS },
44 { X86::VORPDZrr, X86::VORPSZrmb, TB_BCAST_SS },
45 { X86::VORPSZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD },
46 { X86::VORPSZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD },
47 { X86::VORPSZrr, X86::VORPDZrmb, TB_BCAST_SD },
48 { X86::VPANDDZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q },
49 { X86::VPANDDZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q },
50 { X86::VPANDDZrr, X86::VPANDQZrmb, TB_BCAST_Q },
51 { X86::VPANDNDZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q },
52 { X86::VPANDNDZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q },
53 { X86::VPANDNDZrr, X86::VPANDNQZrmb, TB_BCAST_Q },
54 { X86::VPANDNQZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D },
55 { X86::VPANDNQZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D },
56 { X86::VPANDNQZrr, X86::VPANDNDZrmb, TB_BCAST_D },
57 { X86::VPANDQZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D },
58 { X86::VPANDQZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D },
59 { X86::VPANDQZrr, X86::VPANDDZrmb, TB_BCAST_D },
60 { X86::VPORDZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q },
61 { X86::VPORDZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q },
62 { X86::VPORDZrr, X86::VPORQZrmb, TB_BCAST_Q },
63 { X86::VPORQZ128rr, X86::VPORDZ128rmb, TB_BCAST_D },
64 { X86::VPORQZ256rr, X86::VPORDZ256rmb, TB_BCAST_D },
65 { X86::VPORQZrr, X86::VPORDZrmb, TB_BCAST_D },
66 { X86::VPXORDZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q },
67 { X86::VPXORDZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q },
68 { X86::VPXORDZrr, X86::VPXORQZrmb, TB_BCAST_Q },
69 { X86::VPXORQZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D },
70 { X86::VPXORQZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D },
71 { X86::VPXORQZrr, X86::VPXORDZrmb, TB_BCAST_D },
72 { X86::VXORPDZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS },
73 { X86::VXORPDZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS },
74 { X86::VXORPDZrr, X86::VXORPSZrmb, TB_BCAST_SS },
75 { X86::VXORPSZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD },
76 { X86::VXORPSZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD },
77 { X86::VXORPSZrr, X86::VXORPDZrmb, TB_BCAST_SD },
80 static const X86FoldTableEntry BroadcastSizeTable3[] = {
81 { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q },
82 { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q },
83 { X86::VPTERNLOGDZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q },
84 { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D },
85 { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D },
86 { X86::VPTERNLOGQZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D },
89 static const X86FoldTableEntry *
90 lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) {
91 #ifndef NDEBUG
92 #define CHECK_SORTED_UNIQUE(TABLE) \
93 assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted"); \
94 assert(std::adjacent_find(std::begin(Table), std::end(Table)) == \
95 std::end(Table) && \
96 #TABLE " is not unique");
98 // Make sure the tables are sorted.
99 static std::atomic<bool> FoldTablesChecked(false);
100 if (!FoldTablesChecked.load(std::memory_order_relaxed)) {
101 CHECK_SORTED_UNIQUE(Table2Addr)
102 CHECK_SORTED_UNIQUE(Table0)
103 CHECK_SORTED_UNIQUE(Table1)
104 CHECK_SORTED_UNIQUE(Table2)
105 CHECK_SORTED_UNIQUE(Table3)
106 CHECK_SORTED_UNIQUE(Table4)
107 CHECK_SORTED_UNIQUE(BroadcastTable1)
108 CHECK_SORTED_UNIQUE(BroadcastTable2)
109 CHECK_SORTED_UNIQUE(BroadcastTable3)
110 CHECK_SORTED_UNIQUE(BroadcastTable4)
111 CHECK_SORTED_UNIQUE(BroadcastSizeTable2)
112 CHECK_SORTED_UNIQUE(BroadcastSizeTable3)
113 FoldTablesChecked.store(true, std::memory_order_relaxed);
115 #endif
117 const X86FoldTableEntry *Data = llvm::lower_bound(Table, RegOp);
118 if (Data != Table.end() && Data->KeyOp == RegOp &&
119 !(Data->Flags & TB_NO_FORWARD))
120 return Data;
121 return nullptr;
124 const X86FoldTableEntry *
125 llvm::lookupTwoAddrFoldTable(unsigned RegOp) {
126 return lookupFoldTableImpl(Table2Addr, RegOp);
129 const X86FoldTableEntry *
130 llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) {
131 ArrayRef<X86FoldTableEntry> FoldTable;
132 if (OpNum == 0)
133 FoldTable = ArrayRef(Table0);
134 else if (OpNum == 1)
135 FoldTable = ArrayRef(Table1);
136 else if (OpNum == 2)
137 FoldTable = ArrayRef(Table2);
138 else if (OpNum == 3)
139 FoldTable = ArrayRef(Table3);
140 else if (OpNum == 4)
141 FoldTable = ArrayRef(Table4);
142 else
143 return nullptr;
145 return lookupFoldTableImpl(FoldTable, RegOp);
148 namespace {
150 // This class stores the memory unfolding tables. It is instantiated as a
151 // function scope static variable to lazily init the unfolding table.
152 struct X86MemUnfoldTable {
153 // Stores memory unfolding tables entries sorted by opcode.
154 std::vector<X86FoldTableEntry> Table;
156 X86MemUnfoldTable() {
157 for (const X86FoldTableEntry &Entry : Table2Addr)
158 // Index 0, folded load and store, no alignment requirement.
159 addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
161 for (const X86FoldTableEntry &Entry : Table0)
162 // Index 0, mix of loads and stores.
163 addTableEntry(Entry, TB_INDEX_0);
165 for (const X86FoldTableEntry &Entry : Table1)
166 // Index 1, folded load
167 addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD);
169 for (const X86FoldTableEntry &Entry : Table2)
170 // Index 2, folded load
171 addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD);
173 for (const X86FoldTableEntry &Entry : Table3)
174 // Index 3, folded load
175 addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD);
177 for (const X86FoldTableEntry &Entry : Table4)
178 // Index 4, folded load
179 addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD);
181 // Broadcast tables.
182 for (const X86FoldTableEntry &Entry : BroadcastTable2)
183 // Index 2, folded broadcast
184 addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
186 for (const X86FoldTableEntry &Entry : BroadcastTable3)
187 // Index 3, folded broadcast
188 addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
190 for (const X86FoldTableEntry &Entry : BroadcastTable4)
191 // Index 4, folded broadcast
192 addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
194 // Sort the memory->reg unfold table.
195 array_pod_sort(Table.begin(), Table.end());
197 // Now that it's sorted, ensure its unique.
198 assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() &&
199 "Memory unfolding table is not unique!");
202 void addTableEntry(const X86FoldTableEntry &Entry,
203 uint16_t ExtraFlags) {
204 // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it.
205 if ((Entry.Flags & TB_NO_REVERSE) == 0)
206 Table.push_back({Entry.DstOp, Entry.KeyOp,
207 static_cast<uint16_t>(Entry.Flags | ExtraFlags) });
212 const X86FoldTableEntry *
213 llvm::lookupUnfoldTable(unsigned MemOp) {
214 static X86MemUnfoldTable MemUnfoldTable;
215 auto &Table = MemUnfoldTable.Table;
216 auto I = llvm::lower_bound(Table, MemOp);
217 if (I != Table.end() && I->KeyOp == MemOp)
218 return &*I;
219 return nullptr;
222 namespace {
224 // This class stores the memory -> broadcast folding tables. It is instantiated
225 // as a function scope static variable to lazily init the folding table.
226 struct X86BroadcastFoldTable {
227 // Stores memory broadcast folding tables entries sorted by opcode.
228 std::vector<X86FoldTableEntry> Table;
230 X86BroadcastFoldTable() {
231 // Broadcast tables.
232 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable2) {
233 unsigned RegOp = Reg2Bcst.KeyOp;
234 unsigned BcstOp = Reg2Bcst.DstOp;
235 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
236 unsigned MemOp = Reg2Mem->DstOp;
237 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 |
238 TB_FOLDED_LOAD | TB_FOLDED_BCAST;
239 Table.push_back({MemOp, BcstOp, Flags});
242 for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable2) {
243 unsigned RegOp = Reg2Bcst.KeyOp;
244 unsigned BcstOp = Reg2Bcst.DstOp;
245 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
246 unsigned MemOp = Reg2Mem->DstOp;
247 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 |
248 TB_FOLDED_LOAD | TB_FOLDED_BCAST;
249 Table.push_back({MemOp, BcstOp, Flags});
253 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable3) {
254 unsigned RegOp = Reg2Bcst.KeyOp;
255 unsigned BcstOp = Reg2Bcst.DstOp;
256 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
257 unsigned MemOp = Reg2Mem->DstOp;
258 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 |
259 TB_FOLDED_LOAD | TB_FOLDED_BCAST;
260 Table.push_back({MemOp, BcstOp, Flags});
263 for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable3) {
264 unsigned RegOp = Reg2Bcst.KeyOp;
265 unsigned BcstOp = Reg2Bcst.DstOp;
266 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
267 unsigned MemOp = Reg2Mem->DstOp;
268 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 |
269 TB_FOLDED_LOAD | TB_FOLDED_BCAST;
270 Table.push_back({MemOp, BcstOp, Flags});
274 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable4) {
275 unsigned RegOp = Reg2Bcst.KeyOp;
276 unsigned BcstOp = Reg2Bcst.DstOp;
277 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 4)) {
278 unsigned MemOp = Reg2Mem->DstOp;
279 uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_4 |
280 TB_FOLDED_LOAD | TB_FOLDED_BCAST;
281 Table.push_back({MemOp, BcstOp, Flags});
285 // Sort the memory->broadcast fold table.
286 array_pod_sort(Table.begin(), Table.end());
289 } // namespace
291 static bool matchBroadcastSize(const X86FoldTableEntry &Entry,
292 unsigned BroadcastBits) {
293 switch (Entry.Flags & TB_BCAST_MASK) {
294 case TB_BCAST_W:
295 case TB_BCAST_SH:
296 return BroadcastBits == 16;
297 case TB_BCAST_D:
298 case TB_BCAST_SS:
299 return BroadcastBits == 32;
300 case TB_BCAST_Q:
301 case TB_BCAST_SD:
302 return BroadcastBits == 64;
304 return false;
307 const X86FoldTableEntry *
308 llvm::lookupBroadcastFoldTable(unsigned MemOp, unsigned BroadcastBits) {
309 static X86BroadcastFoldTable BroadcastFoldTable;
310 auto &Table = BroadcastFoldTable.Table;
311 for (auto I = llvm::lower_bound(Table, MemOp);
312 I != Table.end() && I->KeyOp == MemOp; ++I) {
313 if (matchBroadcastSize(*I, BroadcastBits))
314 return &*I;
316 return nullptr;