lib/Target/CellSPU/SPUMathInstr.td

   1 //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
   2 //
   3 //                     Cell SPU math operations
   4 //
   5 // This target description file contains instruction sequences for various
   6 // math operations, such as vector multiplies, i32 multiply, etc., for the
   7 // SPU's i32, i16 i8 and corresponding vector types.
   8 //
   9 // Any resemblance to libsimdmath or the Cell SDK simdmath library is
  10 // purely and completely coincidental.
  11 //===----------------------------------------------------------------------===//
  12
  13 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  14 // v16i8 multiply instruction sequence:
  15 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  16
  17 def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
  18           (ORv4i32
  19            (ANDv4i32
  20             (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
  21                        (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
  22                                              (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
  23                        (FSMBIv8i16 0x2222)),
  24             (ILAv4i32 0x0000ffff)),
  25            (SHLIv4i32
  26             (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
  27                                  (ROTMAIv4i32_i32 VECREG:$rB, 16)),
  28                        (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
  29                                              (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
  30                        (FSMBIv8i16 0x2222)), 16))>;
  31
  32 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  33 // v8i16 multiply instruction sequence:
  34 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  35
  36 def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
  37           (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
  38                      (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
  39                      (FSMBIv8i16 0xcccc))>;
  40
  41 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  42 // v4i32, i32 multiply instruction sequence:
  43 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  44
  45 def MPYv4i32:
  46   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
  47       (Av4i32
  48         (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
  49                 (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
  50         (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
  51
  52 def MPYi32:
  53   Pat<(mul R32C:$rA, R32C:$rB),
  54       (Ar32
  55         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
  56               (MPYHr32 R32C:$rB, R32C:$rA)),
  57         (MPYUr32 R32C:$rA, R32C:$rB))>;
  58
  59 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  60 // f32, v4f32 divide instruction sequence:
  61 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  62
  63 // Reciprocal estimate and interpolation
  64 def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
  65 // Division estimate
  66 def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
  67 // Newton-Raphson iteration
  68 def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
  69                                Interpf32.Fragment,
  70                                DivEstf32.Fragment)>;
  71 // Epsilon addition
  72 def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
  73
  74 def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
  75           (SELBf32_cond NRaphf32.Fragment,
  76                         Epsilonf32.Fragment,
  77                         (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
  78
  79 // Reciprocal estimate and interpolation
  80 def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
  81 // Division estimate
  82 def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
  83 // Newton-Raphson iteration
  84 def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
  85                                               (v4f32 VECREG:$rB),
  86                                               (v4f32 VECREG:$rA)),
  87                                    Interpv4f32.Fragment,
  88                                    DivEstv4f32.Fragment)>;
  89 // Epsilon addition
  90 def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
  91
  92 def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
  93           (SELBv4f32_cond NRaphv4f32.Fragment,
  94                         Epsilonv4f32.Fragment,
  95                         (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
  96                                               Epsilonv4f32.Fragment,
  97                                               (v4f32 VECREG:$rA)), -1))>;