zpu: managed to compile program that writes constant to global variable
[llvm/zpu.git] / lib / Target / CellSPU / SPUMathInstr.td
blobed7129e33291b5d5cb98bd0895d6dbb69d435ad5
1 //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
2 //
3 //                     Cell SPU math operations
4 //
5 // This target description file contains instruction sequences for various
6 // math operations, such as vector multiplies, i32 multiply, etc., for the
7 // SPU's i32, i16 i8 and corresponding vector types.
8 //
9 // Any resemblance to libsimdmath or the Cell SDK simdmath library is
10 // purely and completely coincidental.
11 //===----------------------------------------------------------------------===//
13 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
14 // v16i8 multiply instruction sequence:
15 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
17 def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
18           (ORv4i32
19            (ANDv4i32
20             (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
21                        (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
22                                              (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
23                        (FSMBIv8i16 0x2222)),
24             (ILAv4i32 0x0000ffff)),
25            (SHLIv4i32
26             (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
27                                  (ROTMAIv4i32_i32 VECREG:$rB, 16)),
28                        (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
29                                              (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
30                        (FSMBIv8i16 0x2222)), 16))>;
31                         
32 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
33 // v8i16 multiply instruction sequence:
34 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
36 def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
37           (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
38                      (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
39                      (FSMBIv8i16 0xcccc))>;
40                  
41 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
42 // v4i32, i32 multiply instruction sequence:
43 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
45 def MPYv4i32:
46   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
47       (Av4i32
48         (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
49                        (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
50         (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
52 def MPYi32:
53   Pat<(mul R32C:$rA, R32C:$rB),
54       (Ar32
55         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
56               (MPYHr32 R32C:$rB, R32C:$rA)),
57         (MPYUr32 R32C:$rA, R32C:$rB))>;
59 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
60 // f32, v4f32 divide instruction sequence:
61 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
63 // Reciprocal estimate and interpolation
64 def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
65 // Division estimate
66 def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
67 // Newton-Raphson iteration
68 def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
69                                Interpf32.Fragment,
70                                DivEstf32.Fragment)>;
71 // Epsilon addition
72 def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
74 def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
75           (SELBf32_cond NRaphf32.Fragment,
76                         Epsilonf32.Fragment,
77                         (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
79 // Reciprocal estimate and interpolation
80 def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
81 // Division estimate
82 def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
83 // Newton-Raphson iteration
84 def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
85                                               (v4f32 VECREG:$rB),
86                                               (v4f32 VECREG:$rA)),
87                                    Interpv4f32.Fragment,
88                                    DivEstv4f32.Fragment)>;
89 // Epsilon addition
90 def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
92 def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
93           (SELBv4f32_cond NRaphv4f32.Fragment,
94                         Epsilonv4f32.Fragment,
95                         (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
96                                               Epsilonv4f32.Fragment,
97                                               (v4f32 VECREG:$rA)), -1))>;