1 //===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the resources required by P9 instructions. This is part
10 // P9 processor model used for instruction scheduling. This file should contain
11 // all of the instructions that may be used on Power 9. This is not just
12 // instructions that are new on Power 9 but also instructions that were
13 // available on earlier architectures and are still used in Power 9.
15 // The makeup of the P9 CPU is modeled as follows:
16 // - Each CPU is made up of two superslices.
17 // - Each superslice is made up of two slices. Therefore, there are 4 slices
19 // - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
21 // - One CY (Crypto) unit P9_CY_*
22 // - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23 // - Two PM (Permute) units. One on each superslice. P9_PM_*
24 // - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25 // - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26 // - Four DP (Floating Point) units. One on each slice. P9_DP_*
27 // This also includes fixed point multiply add.
28 // - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29 // - Four Load/Store Queues. P9_LS_*
30 // - Each set of instructions will require a number of these resources.
31 //===----------------------------------------------------------------------===//
33 // Two cycle ALU vector operation that uses an entire superslice.
34 // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35 // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
36 def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
37 DISP_1C, DISP_1C, DISP_1C],
39 (instregex "VADDU(B|H|W|D)M$"),
40 (instregex "VAND(C)?$"),
41 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
42 (instregex "V_SET0(B|H)?$"),
43 (instregex "VS(R|L)(B|H|W|D)$"),
44 (instregex "VSUBU(B|H|W|D)M$"),
45 (instregex "VPOPCNT(B|H)$"),
46 (instregex "VRL(B|H|W|D)$"),
47 (instregex "VSRA(B|H|W|D)$"),
48 (instregex "XV(N)?ABS(D|S)P$"),
49 (instregex "XVCPSGN(D|S)P$"),
50 (instregex "XV(I|X)EXP(D|S)P$"),
51 (instregex "VRL(D|W)(MI|NM)$"),
52 (instregex "VMRG(E|O)W$"),
86 // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
87 // slingle slice. However, since it is Restricted it requires all 3 dispatches
88 // (DISP) for that superslice.
89 def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
91 (instregex "TABORT(D|W)C(I)?$"),
92 (instregex "MTFSB(0|1)$"),
93 (instregex "MFFSC(D)?RN(I)?$"),
94 (instregex "CMPRB(8)?$"),
95 (instregex "TD(I)?$"),
96 (instregex "TW(I)?$"),
97 (instregex "FCMPU(S|D)$"),
98 (instregex "XSTSTDC(S|D)P$"),
104 // Standard Dispatch ALU operation for 3 cycles. Only one slice used.
105 def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
107 (instregex "XSMAX(C|J)?DP$"),
108 (instregex "XSMIN(C|J)?DP$"),
109 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
110 (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
111 (instregex "POPCNT(D|W)$"),
112 (instregex "CMPB(8)?$"),
113 (instregex "SETB(8)?$"),
121 // Standard Dispatch ALU operation for 2 cycles. Only one slice used.
122 def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
124 (instregex "S(L|R)D$"),
125 (instregex "SRAD(I)?$"),
126 (instregex "EXTSWSLI$"),
127 (instregex "MFV(S)?RD$"),
128 (instregex "MTVSRD$"),
129 (instregex "MTVSRW(A|Z)$"),
130 (instregex "CMP(WI|LWI|W|LW)(8)?$"),
131 (instregex "CMP(L)?D(I)?$"),
132 (instregex "SUBF(I)?C(8)?$"),
133 (instregex "ANDI(S)?o(8)?$"),
134 (instregex "ADDC(8)?$"),
135 (instregex "ADDIC(8)?(o)?$"),
136 (instregex "ADD(8|4)(o)?$"),
137 (instregex "ADD(E|ME|ZE)(8)?(o)?$"),
138 (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"),
139 (instregex "NEG(8)?(o)?$"),
140 (instregex "POPCNTB$"),
141 (instregex "ADD(I|IS)?(8)?$"),
142 (instregex "LI(S)?(8)?$"),
143 (instregex "(X)?OR(I|IS)?(8)?(o)?$"),
144 (instregex "NAND(8)?(o)?$"),
145 (instregex "AND(C)?(8)?(o)?$"),
146 (instregex "NOR(8)?(o)?$"),
147 (instregex "OR(C)?(8)?(o)?$"),
148 (instregex "EQV(8)?(o)?$"),
149 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
150 (instregex "ADD(4|8)(TLS)?(_)?$"),
151 (instregex "NEG(8)?$"),
152 (instregex "ADDI(S)?toc(HA|L)$"),
172 // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
173 // slingle slice. However, since it is Restricted it requires all 3 dispatches
174 // (DISP) for that superslice.
175 def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
177 (instregex "RLDC(L|R)$"),
178 (instregex "RLWIMI(8)?$"),
179 (instregex "RLDIC(L|R)(_32)?(_64)?$"),
180 (instregex "M(F|T)OCRF(8)?$"),
181 (instregex "CR(6)?(UN)?SET$"),
182 (instregex "CR(N)?(OR|AND)(C)?$"),
183 (instregex "S(L|R)W(8)?$"),
184 (instregex "RLW(INM|NM)(8)?$"),
185 (instregex "F(N)?ABS(D|S)$"),
186 (instregex "FNEG(D|S)$"),
187 (instregex "FCPSGN(D|S)$"),
188 (instregex "SRAW(I)?$"),
189 (instregex "ISEL(8)?$"),
200 // Three cycle ALU vector operation that uses an entire superslice.
201 // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
202 // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
203 def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
204 DISP_1C, DISP_1C, DISP_1C],
206 (instregex "M(T|F)VSCR$"),
207 (instregex "VCMPNEZ(B|H|W)$"),
208 (instregex "VCMPEQU(B|H|W|D)$"),
209 (instregex "VCMPNE(B|H|W)$"),
210 (instregex "VABSDU(B|H|W)$"),
211 (instregex "VADDU(B|H|W)S$"),
212 (instregex "VAVG(S|U)(B|H|W)$"),
213 (instregex "VCMP(EQ|GE|GT)FP(o)?$"),
214 (instregex "VCMPBFP(o)?$"),
215 (instregex "VC(L|T)Z(B|H|W|D)$"),
216 (instregex "VADDS(B|H|W)S$"),
217 (instregex "V(MIN|MAX)FP$"),
218 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
286 // 7 cycle DP vector operation that uses an entire superslice.
287 // Uses both DP units (the even DPE and odd DPO units), two pipelines
288 // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
289 def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
290 DISP_1C, DISP_1C, DISP_1C],
398 // 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
399 // dispatch units for the superslice.
400 def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
402 (instregex "MADD(HD|HDU|LD)$"),
403 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$")
406 // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
407 // dispatch units for the superslice.
408 def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
411 (instregex "FRI(N|P|Z|M)(D|S)$"),
412 (instregex "FRE(S)?$"),
413 (instregex "FADD(S)?$"),
414 (instregex "FMSUB(S)?$"),
415 (instregex "FMADD(S)?$"),
416 (instregex "FSUB(S)?$"),
417 (instregex "FCFID(U)?(S)?$"),
418 (instregex "FCTID(U)?(Z)?$"),
419 (instregex "FCTIW(U)?(Z)?$"),
420 (instregex "FRSQRTE(S)?$"),
449 // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
450 // These operations can be done in parallel.
451 // The DP is restricted so we need a full 5 dispatches.
452 def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
453 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
455 (instregex "FSEL(D|S)o$")
458 // 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
459 def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
460 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
462 (instregex "MUL(H|L)(D|W)(U)?o$")
465 // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
466 // These operations must be done sequentially.
467 // The DP is restricted so we need a full 5 dispatches.
468 def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
469 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
471 (instregex "FRI(N|P|Z|M)(D|S)o$"),
472 (instregex "FRE(S)?o$"),
473 (instregex "FADD(S)?o$"),
474 (instregex "FSUB(S)?o$"),
475 (instregex "F(N)?MSUB(S)?o$"),
476 (instregex "F(N)?MADD(S)?o$"),
477 (instregex "FCFID(U)?(S)?o$"),
478 (instregex "FCTID(U)?(Z)?o$"),
479 (instregex "FCTIW(U)?(Z)?o$"),
480 (instregex "FMUL(S)?o$"),
481 (instregex "FRSQRTE(S)?o$"),
485 // 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
486 def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
521 // Three Cycle PM operation. Only one PM unit per superslice so we use the whole
522 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
524 def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
526 (instregex "LVS(L|R)$"),
527 (instregex "VSPLTIS(W|H|B)$"),
528 (instregex "VSPLT(W|H|B)(s)?$"),
529 (instregex "V_SETALLONES(B|H)?$"),
530 (instregex "VEXTRACTU(B|H|W)$"),
531 (instregex "VINSERT(B|H|W|D)$"),
629 // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
630 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
632 def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
653 // 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
654 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
656 def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
661 // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
662 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
664 def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
678 // 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
679 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
681 def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
686 // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
687 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
689 def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
695 // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
696 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
698 def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
704 // 6 Cycle Load uses a single slice.
705 def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C],
707 (instregex "LXVL(L)?")
710 // 5 Cycle Load uses a single slice.
711 def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
713 (instregex "LVE(B|H|W)X$"),
714 (instregex "LVX(L)?"),
715 (instregex "LXSI(B|H)ZX$"),
729 // 4 Cycle Load uses a single slice.
730 def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
732 (instregex "DCB(F|T|ST)(EP)?$"),
733 (instregex "DCBZ(L)?(EP)?$"),
734 (instregex "DCBTST(EP)?$"),
735 (instregex "CP_COPY(8)?$"),
736 (instregex "CP_PASTE(8)?$"),
737 (instregex "ICBI(EP)?$"),
738 (instregex "ICBT(LS)?$"),
739 (instregex "LBARX(L)?$"),
740 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
741 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
742 (instregex "LH(A|B)RX(L)?(8)?$"),
743 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
744 (instregex "LWARX(L)?$"),
745 (instregex "LWBRX(8)?$"),
746 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
758 // 4 Cycle Restricted load uses a single slice but the dispatch for the whole
760 def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
767 // Cracked Load Instructions.
768 // Load instructions that can be done in parallel.
769 def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
770 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
780 // Cracked Load Instruction.
781 // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
782 // operations can be run in parallel.
783 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
784 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
786 (instregex "L(W|H)ZU(X)?(8)?$"),
790 // Cracked Store Instruction
791 // Consecutive Store and ALU instructions. The store is restricted and requires
793 def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
794 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
796 (instregex "ST(B|H|W|D)CX$")
799 // Cracked Load Instruction.
800 // Two consecutive load operations for a total of 8 cycles.
801 def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
802 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
807 // Cracked Load instruction.
808 // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
809 // operations cannot be done at the same time and so their latencies are added.
810 def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
811 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
813 (instregex "LHA(X)?(8)?$"),
814 (instregex "CP_PASTE(8)?o$"),
815 (instregex "LWA(X)?(_32)?$"),
819 // Cracked Restricted Load instruction.
820 // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
821 // operations cannot be done at the same time and so their latencies are added.
822 // Full 6 dispatches are required as this is both cracked and restricted.
823 def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
824 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
829 // Cracked Load instruction.
830 // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
831 // operations cannot be done at the same time and so their latencies are added.
832 // Full 4 dispatches are required as this is a cracked instruction.
833 def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
834 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
840 // Cracked Load instruction.
841 // Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
842 // cycles. The Load and ALU operations cannot be done at the same time and so
843 // their latencies are added.
844 // Full 6 dispatches are required as this is a restricted instruction.
845 def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
846 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
852 // Cracked Load instruction.
853 // Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
854 // operations cannot be done at the same time and so their latencies are added.
855 // Full 4 dispatches are required as this is a cracked instruction.
856 def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
857 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
865 // Cracked 3-Way Load Instruction
866 // Load with two ALU operations that depend on each other
867 def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
868 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
870 (instregex "LHAU(X)?(8)?$"),
874 // Cracked Load that requires the PM resource.
875 // Since the Load and the PM cannot be done at the same time the latencies are
876 // added. Requires 8 cycles.
877 // Since the PM requires the full superslice we need both EXECE, EXECO pipelines
878 // as well as 3 dispatches for the PM. The Load requires the remaining 2
880 def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
881 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
888 // Single slice Restricted store operation. The restricted operation requires
889 // all three dispatches for the superslice.
890 def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
892 (instregex "STF(S|D|IWX|SX|DX)$"),
893 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
894 (instregex "STW(8)?$"),
895 (instregex "(D|X)FSTORE(f32|f64)$"),
896 (instregex "ST(W|H|D)BRX$"),
897 (instregex "ST(B|H|D)(8)?$"),
898 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
906 // Vector Store Instruction
907 // Requires the whole superslice and therefore requires all three dispatches
908 // as well as both the Even and Odd exec pipelines.
909 def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
910 DISP_1C, DISP_1C, DISP_1C],
912 (instregex "STVE(B|H|W)X$"),
913 (instregex "STVX(L)?$"),
914 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
917 // 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
918 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
920 def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
922 (instregex "MTCTR(8)?(loop)?$"),
923 (instregex "MTLR(8)?$")
926 // 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
927 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
929 def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
931 (instregex "M(T|F)VRSAVE(v)?$"),
932 (instregex "M(T|F)PMR$"),
933 (instregex "M(T|F)TB(8)?$"),
934 (instregex "MF(SPR|CTR|LR)(8)?$"),
935 (instregex "M(T|F)MSR(D)?$"),
936 (instregex "MTSPR(8)?$")
939 // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
940 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
942 def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
943 DISP_1C, DISP_1C, DISP_1C],
950 // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
951 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
953 def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
954 DISP_1C, DISP_1C, DISP_1C],
965 // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
966 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
968 def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
969 DISP_1C, DISP_1C, DISP_1C],
975 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
976 // and one full superslice for the DIV operation since there is only one DIV
977 // per superslice. Latency of DIV plus ALU is 26.
978 def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
979 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
981 (instregex "DIVW(U)?(O)?o$")
984 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
985 // and one full superslice for the DIV operation since there is only one DIV
986 // per superslice. Latency of DIV plus ALU is 26.
987 def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
988 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
996 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
997 // and one full superslice for the DIV operation since there is only one DIV
998 // per superslice. Latency of DIV plus ALU is 42.
999 def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1000 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1006 // CR access instructions in _BrMCR, IIC_BrMCRX.
1008 // Cracked, restricted, ALU operations.
1009 // Here the two ALU ops can actually be done in parallel and therefore the
1010 // latencies are not added together. Otherwise this is like having two
1011 // instructions running together on two pipelines and 6 dispatches.
1012 // ALU ops are 2 cycles each.
1013 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1014 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1020 // Cracked ALU operations.
1021 // Here the two ALU ops can actually be done in parallel and therefore the
1022 // latencies are not added together. Otherwise this is like having two
1023 // instructions running together on two pipelines and 4 dispatches.
1024 // ALU ops are 2 cycles each.
1025 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1026 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1028 (instregex "ADDC(8)?o$"),
1029 (instregex "SUBFC(8)?o$")
1032 // Cracked ALU operations.
1033 // Two ALU ops can be done in parallel.
1034 // One is three cycle ALU the ohter is a two cycle ALU.
1035 // One of the ALU ops is restricted the other is not so we have a total of
1037 def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1038 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1040 (instregex "F(N)?ABS(D|S)o$"),
1041 (instregex "FCPSGN(D|S)o$"),
1042 (instregex "FNEG(D|S)o$"),
1046 // Cracked ALU operations.
1047 // Here the two ALU ops can actually be done in parallel and therefore the
1048 // latencies are not added together. Otherwise this is like having two
1049 // instructions running together on two pipelines and 4 dispatches.
1050 // ALU ops are 3 cycles each.
1051 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1052 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1057 // Cracked Restricted ALU operations.
1058 // Here the two ALU ops can actually be done in parallel and therefore the
1059 // latencies are not added together. Otherwise this is like having two
1060 // instructions running together on two pipelines and 6 dispatches.
1061 // ALU ops are 3 cycles each.
1062 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1063 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1065 (instregex "MTFSF(b|o)?$"),
1066 (instregex "MTFSFI(o)?$")
1069 // Cracked instruction made of two ALU ops.
1070 // The two ops cannot be done in parallel.
1071 // One of the ALU ops is restricted and takes 3 dispatches.
1072 def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1073 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1075 (instregex "RLD(I)?C(R|L)o$"),
1076 (instregex "RLW(IMI|INM|NM)(8)?o$"),
1077 (instregex "SLW(8)?o$"),
1078 (instregex "SRAW(I)?o$"),
1079 (instregex "SRW(8)?o$"),
1084 // Cracked instruction made of two ALU ops.
1085 // The two ops cannot be done in parallel.
1086 // Both of the ALU ops are restricted and take 3 dispatches.
1087 def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1088 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1090 (instregex "MFFS(L|CE|o)?$")
1093 // Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1094 // total of 6 cycles. All of the ALU operations are also restricted so each
1095 // takes 3 dispatches for a total of 9.
1096 def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1097 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1100 (instregex "MFCR(8)?$")
1103 // Cracked instruction made of two ALU ops.
1104 // The two ops cannot be done in parallel.
1105 def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1106 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1108 (instregex "EXTSWSLIo$"),
1109 (instregex "SRAD(I)?o$"),
1115 // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1116 def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1121 // 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1122 def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1123 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1128 // 36 Cycle DP Instruction.
1129 // Instruction can be done on a single slice.
1130 def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C],
1135 // 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1136 def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1141 // 36 Cycle DP Vector Instruction.
1142 def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1143 DISP_1C, DISP_1C, DISP_1C],
1148 // 27 Cycle DP Vector Instruction.
1149 def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1150 DISP_1C, DISP_1C, DISP_1C],
1155 // 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1156 def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1157 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1162 // 26 Cycle DP Instruction.
1163 def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
1168 // 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1169 def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1174 // 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1175 def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1176 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1181 // 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
1182 def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
1187 // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1188 def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1193 // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1194 def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1195 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1200 // 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
1201 def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
1206 // 24 Cycle DP Vector Instruction. Takes one full superslice.
1207 // Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
1209 def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1210 DISP_1C, DISP_1C, DISP_1C],
1215 // 33 Cycle DP Vector Instruction. Takes one full superslice.
1216 // Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
1218 def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1219 DISP_1C, DISP_1C, DISP_1C],
1224 // Instruction cracked into three pieces. One Load and two ALU operations.
1225 // The Load and one of the ALU ops cannot be run at the same time and so the
1226 // latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1227 // Both the load and the ALU that depends on it are restricted and so they take
1228 // a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
1229 // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1230 def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1231 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1232 DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1233 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1235 (instregex "LF(SU|SUX)$")
1238 // Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1239 // the store and so it can be run at the same time as the store. The store is
1241 def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1242 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1244 (instregex "STF(S|D)U(X)?$"),
1245 (instregex "ST(B|H|W|D)U(X)?(8)?$")
1248 // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1249 // the load and so it can be run at the same time as the load.
1250 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1251 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1253 (instregex "LBZU(X)?(8)?$"),
1254 (instregex "LDU(X)?$")
1258 // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1259 // the load and so it can be run at the same time as the load. The load is also
1260 // restricted. 3 dispatches are from the restricted load while the other two
1261 // are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1262 // is required for the ALU.
1263 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1264 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1266 (instregex "LF(DU|DUX)$")
1269 // Crypto Instructions
1271 // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1272 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
1274 def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
1276 (instregex "VPMSUM(B|H|W|D)$"),
1277 (instregex "V(N)?CIPHER(LAST)?$"),
1281 // Branch Instructions
1284 def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
1286 (instregex "BCCCTR(L)?(8)?$"),
1287 (instregex "BCCL(A|R|RL)?$"),
1288 (instregex "BCCTR(L)?(8)?(n)?$"),
1289 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1290 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1291 (instregex "BL(_TLS)?$"),
1292 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
1293 (instregex "BLA(8|8_NOP)?$"),
1294 (instregex "BLR(8|L)?$"),
1295 (instregex "TAILB(A)?(8)?$"),
1296 (instregex "TAILBCTR(8)?$"),
1297 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1298 (instregex "BCLR(L)?(n)?$"),
1299 (instregex "BCTR(L)?(8)?$"),
1313 // Five Cycle Branch with a 2 Cycle ALU Op
1314 // Operations must be done consecutively and not in parallel.
1315 def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
1316 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1321 // Special Extracted Instructions For Atomics
1324 def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1325 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1326 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1327 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1330 (instregex "L(D|W)AT$")
1334 def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1335 IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1338 (instregex "ST(D|W)AT$")
1341 // Signal Processing Engine (SPE) Instructions
1342 // These instructions are not supported on Power 9
1351 (instregex "EVADD(I)?W$"),
1352 (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1353 (instregex "EVAND(C)?$"),
1354 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1355 (instregex "EVCNTL(S|Z)W$"),
1356 (instregex "EVDIVW(S|U)$"),
1357 (instregex "EVEXTS(B|H)$"),
1358 (instregex "EVLD(H|W|D)(X)?$"),
1359 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1360 (instregex "EVLWHE(X)?$"),
1361 (instregex "EVLWHO(S|U)(X)?$"),
1362 (instregex "EVLW(H|W)SPLAT(X)?$"),
1363 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1364 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1365 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1366 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1367 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1368 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1369 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1370 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1371 (instregex "EVMWHUMI(A)?$"),
1372 (instregex "EVMWLS(M|S)IA(A|N)W$"),
1373 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1374 (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1375 (instregex "EVMWSSF(A|AA|AN)?$"),
1376 (instregex "EVMWUMI(A|AA|AN)?$"),
1377 (instregex "EV(N|X)?OR(C)?$"),
1378 (instregex "EVR(LW|LWI|NDW)$"),
1379 (instregex "EVSLW(I)?$"),
1380 (instregex "EVSPLAT(F)?I$"),
1381 (instregex "EVSRW(I)?(S|U)$"),
1382 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1383 (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1384 (instregex "EVSUB(I)?FW$")
1385 )> { let Unsupported = 1; }
1387 // General Instructions without scheduling support.
1390 (instregex "(H)?RFI(D)?$"),
1391 (instregex "DSS(ALL)?$"),
1392 (instregex "DST(ST)?(T)?(64)?$"),
1393 (instregex "ICBL(C|Q)$"),
1394 (instregex "L(W|H|B)EPX$"),
1395 (instregex "ST(W|H|B)EPX$"),
1396 (instregex "(L|ST)FDEPX$"),
1397 (instregex "M(T|F)SR(IN)?$"),
1398 (instregex "M(T|F)DCR$"),
1399 (instregex "NOP_GT_PWR(6|7)$"),
1400 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1401 (instregex "WRTEE(I)?$"),
1419 )> { let Unsupported = 1; }