lib/Target/ARM/ARMScheduleM4.td

   1 //==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the SchedRead/Write data for the ARM Cortex-M4 processor.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 def CortexM4Model : SchedMachineModel {
  14   let IssueWidth        = 1; // Only IT can be dual-issued, so assume single-issue
  15   let MicroOpBufferSize = 0; // In-order
  16   let LoadLatency       = 2; // Latency when not pipelined, not pc-relative
  17   let MispredictPenalty = 2; // Best case branch taken cost
  18   let PostRAScheduler   = 1;
  19
  20   let CompleteModel = 0;
  21 }
  22
  23
  24 // We model the entire cpu as a single pipeline with a BufferSize = 0 since
  25 // Cortex-M4 is in-order.
  26
  27 def M4Unit : ProcResource<1> { let BufferSize = 0; }
  28
  29
  30 let SchedModel = CortexM4Model in {
  31
  32 // Some definitions of latencies we apply to different instructions
  33
  34 class M4UnitL1<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 1; }
  35 class M4UnitL2<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 2; }
  36 class M4UnitL3<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 3; }
  37 class M4UnitL14<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 14; }
  38 def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; }
  39 def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; }
  40 class M4UnitL1I<dag instr> : InstRW<[M4UnitL1_wr], instr>;
  41 class M4UnitL2I<dag instr> : InstRW<[M4UnitL2_wr], instr>;
  42
  43
  44 // Loads, MAC's and DIV all get a higher latency of 2
  45 def : M4UnitL2<WriteLd>;
  46 def : M4UnitL2<WriteMAC32>;
  47 def : M4UnitL2<WriteMAC64Hi>;
  48 def : M4UnitL2<WriteMAC64Lo>;
  49 def : M4UnitL2<WriteMAC16>;
  50 def : M4UnitL2<WriteDIV>;
  51
  52 def : M4UnitL2I<(instregex "(t|t2)LDM")>;
  53
  54
  55 // Stores we use a latency of 1 as they have no outputs
  56
  57 def : M4UnitL1<WriteST>;
  58 def : M4UnitL1I<(instregex "(t|t2)STM")>;
  59
  60
  61 // Everything else has a Latency of 1
  62
  63 def : M4UnitL1<WriteALU>;
  64 def : M4UnitL1<WriteALUsi>;
  65 def : M4UnitL1<WriteALUsr>;
  66 def : M4UnitL1<WriteALUSsr>;
  67 def : M4UnitL1<WriteBr>;
  68 def : M4UnitL1<WriteBrL>;
  69 def : M4UnitL1<WriteBrTbl>;
  70 def : M4UnitL1<WriteCMPsi>;
  71 def : M4UnitL1<WriteCMPsr>;
  72 def : M4UnitL1<WriteCMP>;
  73 def : M4UnitL1<WriteMUL32>;
  74 def : M4UnitL1<WriteMUL64Hi>;
  75 def : M4UnitL1<WriteMUL64Lo>;
  76 def : M4UnitL1<WriteMUL16>;
  77 def : M4UnitL1<WriteNoop>;
  78 def : M4UnitL1<WritePreLd>;
  79 def : M4UnitL1I<(instregex "(t|t2)MOV")>;
  80 def : M4UnitL1I<(instrs COPY)>;
  81 def : M4UnitL1I<(instregex "t2IT")>;
  82 def : M4UnitL1I<(instregex "t2SEL", "t2USAD8",
  83     "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>;
  84
  85 def : ReadAdvance<ReadALU, 0>;
  86 def : ReadAdvance<ReadALUsr, 0>;
  87 def : ReadAdvance<ReadMUL, 0>;
  88 def : ReadAdvance<ReadMAC, 0>;
  89
  90 // Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's.
  91 // Loads still take 2 cycles.
  92
  93 def : M4UnitL1<WriteFPCVT>;
  94 def : M4UnitL1<WriteFPMOV>;
  95 def : M4UnitL1<WriteFPALU32>;
  96 def : M4UnitL1<WriteFPALU64>;
  97 def : M4UnitL1<WriteFPMUL32>;
  98 def : M4UnitL1<WriteFPMUL64>;
  99 def : M4UnitL2I<(instregex "VLD")>;
 100 def : M4UnitL1I<(instregex "VST")>;
 101 def : M4UnitL3<WriteFPMAC32>;
 102 def : M4UnitL3<WriteFPMAC64>;
 103 def : M4UnitL14<WriteFPDIV32>;
 104 def : M4UnitL14<WriteFPDIV64>;
 105 def : M4UnitL14<WriteFPSQRT32>;
 106 def : M4UnitL14<WriteFPSQRT64>;
 107 def : M4UnitL1<WriteVLD1>;
 108 def : M4UnitL1<WriteVLD2>;
 109 def : M4UnitL1<WriteVLD3>;
 110 def : M4UnitL1<WriteVLD4>;
 111 def : M4UnitL1<WriteVST1>;
 112 def : M4UnitL1<WriteVST2>;
 113 def : M4UnitL1<WriteVST3>;
 114 def : M4UnitL1<WriteVST4>;
 115
 116 def : ReadAdvance<ReadFPMUL, 0>;
 117 def : ReadAdvance<ReadFPMAC, 0>;
 118
 119 }