llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td

   1 // LoongArchFloat32InstrInfo.td - Single-Precision Float instr --*- tablegen -*-
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file describes the baisc single-precision floating-point instructions.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 //===----------------------------------------------------------------------===//
  14 // LoongArch specific DAG Nodes.
  15 //===----------------------------------------------------------------------===//
  16
  17 def SDT_LoongArchMOVGR2FR_W_LA64
  18     : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>;
  19 def SDT_LoongArchMOVFR2GR_S_LA64
  20     : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
  21 def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
  22
  23 def loongarch_movgr2fr_w_la64
  24     : SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>;
  25 def loongarch_movfr2gr_s_la64
  26     : SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>;
  27 def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
  28
  29 //===----------------------------------------------------------------------===//
  30 // Instructions
  31 //===----------------------------------------------------------------------===//
  32
  33 let Predicates = [HasBasicF] in {
  34
  35 // Arithmetic Operation Instructions
  36 def FADD_S : FP_ALU_3R<0x01008000>;
  37 def FSUB_S : FP_ALU_3R<0x01028000>;
  38 def FMUL_S : FP_ALU_3R<0x01048000>;
  39 def FDIV_S : FP_ALU_3R<0x01068000>;
  40 def FMADD_S  : FP_ALU_4R<0x08100000>;
  41 def FMSUB_S  : FP_ALU_4R<0x08500000>;
  42 def FNMADD_S : FP_ALU_4R<0x08900000>;
  43 def FNMSUB_S : FP_ALU_4R<0x08d00000>;
  44 def FMAX_S  : FP_ALU_3R<0x01088000>;
  45 def FMIN_S  : FP_ALU_3R<0x010a8000>;
  46 def FMAXA_S : FP_ALU_3R<0x010c8000>;
  47 def FMINA_S : FP_ALU_3R<0x010e8000>;
  48 def FABS_S   : FP_ALU_2R<0x01140400>;
  49 def FNEG_S   : FP_ALU_2R<0x01141400>;
  50 def FSQRT_S  : FP_ALU_2R<0x01144400>;
  51 def FRECIP_S : FP_ALU_2R<0x01145400>;
  52 def FRSQRT_S : FP_ALU_2R<0x01146400>;
  53 def FRECIPE_S : FP_ALU_2R<0x01147400>;
  54 def FRSQRTE_S : FP_ALU_2R<0x01148400>;
  55 def FSCALEB_S : FP_ALU_3R<0x01108000>;
  56 def FLOGB_S   : FP_ALU_2R<0x01142400>;
  57 def FCOPYSIGN_S : FP_ALU_3R<0x01128000>;
  58 def FCLASS_S  : FP_ALU_2R<0x01143400>;
  59
  60
  61 // Comparison Instructions
  62 def FCMP_CAF_S  : FP_CMP<0x0c100000>;
  63 def FCMP_CUN_S  : FP_CMP<0x0c140000>;
  64 def FCMP_CEQ_S  : FP_CMP<0x0c120000>;
  65 def FCMP_CUEQ_S : FP_CMP<0x0c160000>;
  66 def FCMP_CLT_S  : FP_CMP<0x0c110000>;
  67 def FCMP_CULT_S : FP_CMP<0x0c150000>;
  68 def FCMP_CLE_S  : FP_CMP<0x0c130000>;
  69 def FCMP_CULE_S : FP_CMP<0x0c170000>;
  70 def FCMP_CNE_S  : FP_CMP<0x0c180000>;
  71 def FCMP_COR_S  : FP_CMP<0x0c1a0000>;
  72 def FCMP_CUNE_S : FP_CMP<0x0c1c0000>;
  73 def FCMP_SAF_S  : FP_CMP<0x0c108000>;
  74 def FCMP_SUN_S  : FP_CMP<0x0c148000>;
  75 def FCMP_SEQ_S  : FP_CMP<0x0c128000>;
  76 def FCMP_SUEQ_S : FP_CMP<0x0c168000>;
  77 def FCMP_SLT_S  : FP_CMP<0x0c118000>;
  78 def FCMP_SULT_S : FP_CMP<0x0c158000>;
  79 def FCMP_SLE_S  : FP_CMP<0x0c138000>;
  80 def FCMP_SULE_S : FP_CMP<0x0c178000>;
  81 def FCMP_SNE_S  : FP_CMP<0x0c188000>;
  82 def FCMP_SOR_S  : FP_CMP<0x0c1a8000>;
  83 def FCMP_SUNE_S : FP_CMP<0x0c1c8000>;
  84
  85 // Conversion Instructions
  86 def FFINT_S_W    : FP_CONV<0x011d1000>;
  87 def FTINT_W_S    : FP_CONV<0x011b0400>;
  88 def FTINTRM_W_S  : FP_CONV<0x011a0400>;
  89 def FTINTRP_W_S  : FP_CONV<0x011a4400>;
  90 def FTINTRZ_W_S  : FP_CONV<0x011a8400>;
  91 def FTINTRNE_W_S : FP_CONV<0x011ac400>;
  92 def FRINT_S      : FP_CONV<0x011e4400>;
  93
  94 // Move Instructions
  95 def FSEL_xS    : FP_SEL<0x0d000000>;
  96 def FMOV_S     : FP_MOV<0x01149400>;
  97 def MOVGR2FR_W : FP_MOV<0x0114a400, FPR32, GPR>;
  98 def MOVFR2GR_S : FP_MOV<0x0114b400, GPR, FPR32>;
  99 let hasSideEffects = 1 in {
 100 def MOVGR2FCSR : FP_MOV<0x0114c000, FCSR, GPR>;
 101 def MOVFCSR2GR : FP_MOV<0x0114c800, GPR, FCSR>;
 102 } // hasSideEffects = 1
 103 def MOVFR2CF_xS : FP_MOV<0x0114d000, CFR, FPR32>;
 104 def MOVCF2FR_xS : FP_MOV<0x0114d400, FPR32, CFR>;
 105 def MOVGR2CF    : FP_MOV<0x0114d800, CFR, GPR>;
 106 def MOVCF2GR    : FP_MOV<0x0114dc00, GPR, CFR>;
 107
 108 // Branch Instructions
 109 def BCEQZ : FP_BRANCH<0x48000000>;
 110 def BCNEZ : FP_BRANCH<0x48000100>;
 111
 112 // Common Memory Access Instructions
 113 def FLD_S : FP_LOAD_2RI12<0x2b000000>;
 114 def FST_S : FP_STORE_2RI12<0x2b400000>;
 115 def FLDX_S : FP_LOAD_3R<0x38300000>;
 116 def FSTX_S : FP_STORE_3R<0x38380000>;
 117
 118 // Bound Check Memory Access Instructions
 119 def FLDGT_S : FP_LOAD_3R<0x38740000>;
 120 def FLDLE_S : FP_LOAD_3R<0x38750000>;
 121 def FSTGT_S : FP_STORE_3R<0x38760000>;
 122 def FSTLE_S : FP_STORE_3R<0x38770000>;
 123
 124 // Pseudo instructions for spill/reload CFRs.
 125 let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
 126 def PseudoST_CFR : Pseudo<(outs),
 127                           (ins CFR:$ccd, GPR:$rj, grlenimm:$imm)>;
 128 let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
 129 def PseudoLD_CFR : Pseudo<(outs CFR:$ccd),
 130                           (ins GPR:$rj, grlenimm:$imm)>;
 131
 132 // SET_CFR_{FALSE,TRUE}
 133 // These instructions are defined in order to avoid expensive check error if
 134 // regular instruction patterns are used.
 135 // fcmp.caf.s $dst, $fa0, $fa0
 136 def SET_CFR_FALSE : SET_CFR<0x0c100000, "fcmp.caf.s">;
 137 // fcmp.cueq.s $dst, $fa0, $fa0
 138 def SET_CFR_TRUE  : SET_CFR<0x0c160000, "fcmp.cueq.s">;
 139
 140 // Pseudo instruction for copying CFRs.
 141 def PseudoCopyCFR : Pseudo<(outs CFR:$dst), (ins CFR:$src)> {
 142   let mayLoad = 0;
 143   let mayStore = 0;
 144   let hasSideEffects = 0;
 145   let Size = 12;
 146 }
 147
 148 } // Predicates = [HasBasicF]
 149
 150 //===----------------------------------------------------------------------===//
 151 // Pseudo-instructions and codegen patterns
 152 //===----------------------------------------------------------------------===//
 153
 154 /// Generic pattern classes
 155
 156 class PatFpr<SDPatternOperator OpNode, LAInst Inst, RegisterClass RegTy>
 157     : Pat<(OpNode RegTy:$fj), (Inst $fj)>;
 158 class PatFprFpr<SDPatternOperator OpNode, LAInst Inst, RegisterClass RegTy>
 159     : Pat<(OpNode RegTy:$fj, RegTy:$fk), (Inst $fj, $fk)>;
 160
 161 let Predicates = [HasBasicF] in {
 162
 163 /// Float arithmetic operations
 164
 165 def : PatFprFpr<fadd, FADD_S, FPR32>;
 166 def : PatFprFpr<fsub, FSUB_S, FPR32>;
 167 def : PatFprFpr<fmul, FMUL_S, FPR32>;
 168 def : PatFprFpr<fdiv, FDIV_S, FPR32>;
 169 def : PatFprFpr<fcopysign, FCOPYSIGN_S, FPR32>;
 170 def : PatFprFpr<fmaxnum_ieee, FMAX_S, FPR32>;
 171 def : PatFprFpr<fminnum_ieee, FMIN_S, FPR32>;
 172 def : PatFpr<fneg, FNEG_S, FPR32>;
 173 def : PatFpr<fabs, FABS_S, FPR32>;
 174 def : PatFpr<fsqrt, FSQRT_S, FPR32>;
 175 def : Pat<(fdiv fpimm1, (fsqrt FPR32:$fj)), (FRSQRT_S FPR32:$fj)>;
 176 def : Pat<(fcanonicalize FPR32:$fj), (FMAX_S $fj, $fj)>;
 177 def : Pat<(is_fpclass FPR32:$fj, (i32 timm:$mask)),
 178           (SLTU R0, (ANDI (MOVFR2GR_S (FCLASS_S FPR32:$fj)),
 179                           (to_fclass_mask timm:$mask)))>;
 180
 181 /// Setcc
 182
 183 // Match non-signaling comparison
 184
 185 class PatFPSetcc<CondCode cc, LAInst CmpInst, RegisterClass RegTy>
 186     : Pat<(any_fsetcc RegTy:$fj, RegTy:$fk, cc),
 187           (CmpInst RegTy:$fj, RegTy:$fk)>;
 188 // SETOGT/SETOGE/SETUGT/SETUGE/SETGE/SETNE/SETGT will expand into
 189 // SETOLT/SETOLE/SETULT/SETULE/SETLE/SETEQ/SETLT.
 190 def : PatFPSetcc<SETOEQ, FCMP_CEQ_S,  FPR32>;
 191 def : PatFPSetcc<SETEQ,  FCMP_CEQ_S,  FPR32>;
 192 def : PatFPSetcc<SETOLT, FCMP_CLT_S,  FPR32>;
 193 def : PatFPSetcc<SETOLE, FCMP_CLE_S,  FPR32>;
 194 def : PatFPSetcc<SETLE,  FCMP_CLE_S,  FPR32>;
 195 def : PatFPSetcc<SETONE, FCMP_CNE_S,  FPR32>;
 196 def : PatFPSetcc<SETO,   FCMP_COR_S,  FPR32>;
 197 def : PatFPSetcc<SETUEQ, FCMP_CUEQ_S, FPR32>;
 198 def : PatFPSetcc<SETULT, FCMP_CULT_S, FPR32>;
 199 def : PatFPSetcc<SETULE, FCMP_CULE_S, FPR32>;
 200 def : PatFPSetcc<SETUNE, FCMP_CUNE_S, FPR32>;
 201 def : PatFPSetcc<SETUO,  FCMP_CUN_S,  FPR32>;
 202 def : PatFPSetcc<SETLT,  FCMP_CLT_S,  FPR32>;
 203
 204 multiclass PatFPBrcond<CondCode cc, LAInst CmpInst, RegisterClass RegTy> {
 205   def : Pat<(brcond (xor (GRLenVT (setcc RegTy:$fj, RegTy:$fk, cc)), -1),
 206                      bb:$imm21),
 207             (BCEQZ (CmpInst RegTy:$fj, RegTy:$fk), bb:$imm21)>;
 208   def : Pat<(brcond (GRLenVT (setcc RegTy:$fj, RegTy:$fk, cc)), bb:$imm21),
 209             (BCNEZ (CmpInst RegTy:$fj, RegTy:$fk), bb:$imm21)>;
 210 }
 211
 212 defm : PatFPBrcond<SETOEQ, FCMP_CEQ_S, FPR32>;
 213 defm : PatFPBrcond<SETOLT, FCMP_CLT_S, FPR32>;
 214 defm : PatFPBrcond<SETOLE, FCMP_CLE_S, FPR32>;
 215 defm : PatFPBrcond<SETONE, FCMP_CNE_S, FPR32>;
 216 defm : PatFPBrcond<SETO,   FCMP_COR_S, FPR32>;
 217 defm : PatFPBrcond<SETUEQ, FCMP_CUEQ_S, FPR32>;
 218 defm : PatFPBrcond<SETULT, FCMP_CULT_S, FPR32>;
 219 defm : PatFPBrcond<SETULE, FCMP_CULE_S, FPR32>;
 220 defm : PatFPBrcond<SETUNE, FCMP_CUNE_S, FPR32>;
 221 defm : PatFPBrcond<SETUO,  FCMP_CUN_S, FPR32>;
 222 defm : PatFPBrcond<SETLT,  FCMP_CLT_S, FPR32>;
 223
 224 // Match signaling comparison
 225
 226 class PatStrictFsetccs<CondCode cc, LAInst CmpInst, RegisterClass RegTy>
 227     : Pat<(strict_fsetccs RegTy:$fj, RegTy:$fk, cc),
 228           (CmpInst RegTy:$fj, RegTy:$fk)>;
 229 def : PatStrictFsetccs<SETOEQ, FCMP_SEQ_S,  FPR32>;
 230 def : PatStrictFsetccs<SETOLT, FCMP_SLT_S,  FPR32>;
 231 def : PatStrictFsetccs<SETOLE, FCMP_SLE_S,  FPR32>;
 232 def : PatStrictFsetccs<SETONE, FCMP_SNE_S,  FPR32>;
 233 def : PatStrictFsetccs<SETO,   FCMP_SOR_S,  FPR32>;
 234 def : PatStrictFsetccs<SETUEQ, FCMP_SUEQ_S, FPR32>;
 235 def : PatStrictFsetccs<SETULT, FCMP_SULT_S, FPR32>;
 236 def : PatStrictFsetccs<SETULE, FCMP_SULE_S, FPR32>;
 237 def : PatStrictFsetccs<SETUNE, FCMP_SUNE_S, FPR32>;
 238 def : PatStrictFsetccs<SETUO,  FCMP_SUN_S,  FPR32>;
 239 def : PatStrictFsetccs<SETLT,  FCMP_SLT_S,  FPR32>;
 240
 241 /// Select
 242
 243 def : Pat<(select CFR:$cc, FPR32:$fk, FPR32:$fj),
 244           (FSEL_xS FPR32:$fj, FPR32:$fk, CFR:$cc)>;
 245
 246 /// Selectcc
 247
 248 class PatFPSelectcc<CondCode cc, LAInst CmpInst, LAInst SelInst,
 249                     RegisterClass RegTy>
 250     : Pat<(select (GRLenVT (setcc RegTy:$a, RegTy:$b, cc)), RegTy:$t, RegTy:$f),
 251           (SelInst RegTy:$f, RegTy:$t, (CmpInst RegTy:$a, RegTy:$b))>;
 252 def : PatFPSelectcc<SETOEQ, FCMP_CEQ_S,  FSEL_xS, FPR32>;
 253 def : PatFPSelectcc<SETOLT, FCMP_CLT_S,  FSEL_xS, FPR32>;
 254 def : PatFPSelectcc<SETOLE, FCMP_CLE_S,  FSEL_xS, FPR32>;
 255 def : PatFPSelectcc<SETONE, FCMP_CNE_S,  FSEL_xS, FPR32>;
 256 def : PatFPSelectcc<SETO,   FCMP_COR_S,  FSEL_xS, FPR32>;
 257 def : PatFPSelectcc<SETUEQ, FCMP_CUEQ_S, FSEL_xS, FPR32>;
 258 def : PatFPSelectcc<SETULT, FCMP_CULT_S, FSEL_xS, FPR32>;
 259 def : PatFPSelectcc<SETULE, FCMP_CULE_S, FSEL_xS, FPR32>;
 260 def : PatFPSelectcc<SETUNE, FCMP_CUNE_S, FSEL_xS, FPR32>;
 261 def : PatFPSelectcc<SETUO,  FCMP_CUN_S,  FSEL_xS, FPR32>;
 262
 263 /// Loads
 264
 265 defm : LdPat<load, FLD_S, f32>;
 266 def : RegRegLdPat<load, FLDX_S, f32>;
 267
 268 /// Stores
 269
 270 defm : StPat<store, FST_S, FPR32, f32>;
 271 def : RegRegStPat<store, FSTX_S, FPR32, f32>;
 272
 273 /// Floating point constants
 274
 275 def : Pat<(f32 fpimm0), (MOVGR2FR_W R0)>;
 276 def : Pat<(f32 fpimm0neg), (FNEG_S (MOVGR2FR_W R0))>;
 277 def : Pat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1)))>;
 278
 279 // FP Conversion
 280 def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>;
 281
 282 // FP reciprocal operation
 283 def : Pat<(fdiv fpimm1, FPR32:$src), (FRECIP_S $src)>;
 284
 285 let Predicates = [HasFrecipe] in {
 286 // FP approximate reciprocal operation
 287 def : Pat<(int_loongarch_frecipe_s FPR32:$src), (FRECIPE_S FPR32:$src)>;
 288 def : Pat<(int_loongarch_frsqrte_s FPR32:$src), (FRSQRTE_S FPR32:$src)>;
 289 }
 290
 291 // fmadd.s: fj * fk + fa
 292 def : Pat<(fma FPR32:$fj, FPR32:$fk, FPR32:$fa), (FMADD_S $fj, $fk, $fa)>;
 293
 294 // fmsub.s: fj * fk - fa
 295 def : Pat<(fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa)),
 296           (FMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
 297
 298 // fnmadd.s: -(fj * fk + fa)
 299 def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)),
 300           (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
 301
 302 // fnmadd.s: -fj * fk - fa (the nsz flag on the FMA)
 303 def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)),
 304           (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
 305
 306 // fnmsub.s: -(fj * fk - fa)
 307 def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))),
 308           (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
 309
 310 // fnmsub.s: -fj * fk + fa (the nsz flag on the FMA)
 311 def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
 312           (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
 313 } // Predicates = [HasBasicF]
 314
 315 let Predicates = [HasBasicF, IsLA64] in {
 316 // GPR -> FPR
 317 def : Pat<(loongarch_movgr2fr_w_la64 GPR:$src), (MOVGR2FR_W GPR:$src)>;
 318 // FPR -> GPR
 319 def : Pat<(loongarch_movfr2gr_s_la64 FPR32:$src),
 320           (MOVFR2GR_S FPR32:$src)>;
 321 // int -> f32
 322 def : Pat<(f32 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))),
 323           (FFINT_S_W (MOVGR2FR_W GPR:$src))>;
 324 // uint -> f32
 325 def : Pat<(f32 (uint_to_fp (i64 (sexti32 (i64 GPR:$src))))),
 326           (FFINT_S_W (MOVGR2FR_W GPR:$src))>;
 327 } // Predicates = [HasBasicF, IsLA64]
 328
 329 // FP Rounding
 330 let Predicates = [HasBasicF, IsLA64] in {
 331 def : PatFpr<frint, FRINT_S, FPR32>;
 332 } // Predicates = [HasBasicF, IsLA64]
 333
 334 let Predicates = [HasBasicF, IsLA32] in {
 335 // GPR -> FPR
 336 def : Pat<(bitconvert (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>;
 337 // FPR -> GPR
 338 def : Pat<(i32 (bitconvert FPR32:$src)), (MOVFR2GR_S FPR32:$src)>;
 339 // int -> f32
 340 def : Pat<(f32 (sint_to_fp (i32 GPR:$src))), (FFINT_S_W (MOVGR2FR_W GPR:$src))>;
 341 } // Predicates = [HasBasicF, IsLA32]