1 //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
3 // Cell SPU math operations
5 // This target description file contains instruction sequences for various
6 // math operations, such as vector multiplies, i32 multiply, etc., for the
7 // SPU's i32, i16 i8 and corresponding vector types.
9 // Any resemblance to libsimdmath or the Cell SDK simdmath library is
10 // purely and completely coincidental.
11 //===----------------------------------------------------------------------===//
13 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
14 // v16i8 multiply instruction sequence:
15 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
17 def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
20 (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
21 (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
22 (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
24 (ILAv4i32 0x0000ffff)),
26 (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
27 (ROTMAIv4i32_i32 VECREG:$rB, 16)),
28 (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
29 (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
30 (FSMBIv8i16 0x2222)), 16))>;
32 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
33 // v8i16 multiply instruction sequence:
34 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
36 def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
37 (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
38 (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
39 (FSMBIv8i16 0xcccc))>;
41 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
42 // v4i32, i32 multiply instruction sequence:
43 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
46 Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
48 (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
49 (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
50 (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
53 Pat<(mul R32C:$rA, R32C:$rB),
55 (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
56 (MPYHr32 R32C:$rB, R32C:$rA)),
57 (MPYUr32 R32C:$rA, R32C:$rB))>;
59 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
60 // f32, v4f32 divide instruction sequence:
61 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
63 // Reciprocal estimate and interpolation
64 def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
66 def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
67 // Newton-Raphson iteration
68 def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
72 def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
74 def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
75 (SELBf32_cond NRaphf32.Fragment,
77 (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
79 // Reciprocal estimate and interpolation
80 def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
82 def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
83 // Newton-Raphson iteration
84 def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
88 DivEstv4f32.Fragment)>;
90 def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
92 def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
93 (SELBv4f32_cond NRaphv4f32.Fragment,
94 Epsilonv4f32.Fragment,
95 (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
96 Epsilonv4f32.Fragment,
97 (v4f32 VECREG:$rA)), -1))>;