Add a function for profiling to run at shutdown. Unlike the existing API, this
[llvm/stm8.git] / lib / Target / CellSPU / SPU64InstrInfo.td
blob5ef5716bd8cf7a6dc8111152700dfa1cf6e12c43
1 //====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
2 //
3 //                     Cell SPU 64-bit operations
4 //
5 //===----------------------------------------------------------------------===//
7 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
8 // 64-bit comparisons:
9 //
10 // 1. The instruction sequences for vector vice scalar differ by a
11 //    constant. In the scalar case, we're only interested in the
12 //    top two 32-bit slots, whereas we're interested in an exact
13 //    all-four-slot match in the vector case.
15 // 2. There are no "immediate" forms, since loading 64-bit constants
16 //    could be a constant pool load.
18 // 3. i64 setcc results are i32, which are subsequently converted to a FSM
19 //    mask when used in a select pattern.
21 // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
22 //    [Note: this may be moot, since gb produces v4i32 or r32.]
24 // 5. The code sequences for r64 and v2i64 are probably overly conservative,
25 //    compared to the code that gcc produces.
27 // M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
28 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
30 // selb instruction definition for i64. Note that the selection mask is
31 // a vector, produced by various forms of FSM:
32 def SELBr64_cond:
33   SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
34            [/* no pattern */]>;
36 // The generic i64 select pattern, which assumes that the comparison result
37 // is in a 32-bit register that contains a select mask pattern (i.e., gather
38 // bits result):
40 def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
41           (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
43 // select the negative condition:
44 class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
45   Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
46       (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
48 // setcc the negative condition:
49 class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
50   Pat<(cond R64C:$rA, R64C:$rB),
51       (XORIr32 compare.Fragment, -1)>;
53 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
54 // The i64 seteq fragment that does the scalar->vector conversion and
55 // comparison:
56 def CEQr64compare:
57     CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
58                                            (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
60 // The i64 seteq fragment that does the vector comparison
61 def CEQv2i64compare:
62     CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
64 // i64 seteq (equality): the setcc result is i32, which is converted to a
65 // vector FSM mask when used in a select pattern.
67 // v2i64 seteq (equality): the setcc result is v4i32
68 multiclass CompareEqual64 {
69   // Plain old comparison, converts back to i32 scalar
70   def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
71   def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
73   // SELB mask from FSM:
74   def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
75                                (FSMv4i32 CEQr64compare.Fragment), R32C))>;
76   def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
77                                (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
80 defm I64EQ: CompareEqual64;
82 def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
83 def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
85 // i64 setne:
86 def : I64SETCCNegCond<setne, I64EQr64>;
87 def : I64SELECTNegCond<setne, I64EQr64>;
89 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
90 // i64 setugt/setule:
91 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
93 def CLGTr64ugt:
94     CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
95                         (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
97 def CLGTr64eq:
98     CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
99                        (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
100     
101 def CLGTr64compare:
102     CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
103                         (XSWDv2i64 CLGTr64ugt.Fragment),
104                         CLGTr64eq.Fragment)>;
106 def CLGTv2i64ugt:
107     CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
109 def CLGTv2i64eq:
110     CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
111     
112 def CLGTv2i64compare:
113     CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
114                         (XSWDv2i64 CLGTr64ugt.Fragment),
115                         CLGTv2i64eq.Fragment)>;
117 multiclass CompareLogicalGreaterThan64 {
118   // Plain old comparison, converts back to i32 scalar
119   def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
120   def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
122   // SELB mask from FSM:
123   def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
124                                (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
125   def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
126                                (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
129 defm I64LGT: CompareLogicalGreaterThan64;
131 def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
132 //def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
133 //          I64LGTv2i64.Fragment>;
135 // i64 setult:
136 def : I64SETCCNegCond<setule, I64LGTr64>;
137 def : I64SELECTNegCond<setule, I64LGTr64>;
139 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
140 // i64 setuge/setult:
141 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
143 def CLGEr64compare:
144     CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
145                                           CLGTr64eq.Fragment)), 0xb)>;
147 def CLGEv2i64compare:
148     CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
149                                           CLGTv2i64eq.Fragment)), 0xf)>;
151 multiclass CompareLogicalGreaterEqual64 {
152   // Plain old comparison, converts back to i32 scalar
153   def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
154   def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
156   // SELB mask from FSM:
157   def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
158                            (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
159   def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
160                            (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
163 defm I64LGE: CompareLogicalGreaterEqual64;
165 def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
166 def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
167           I64LGEv2i64.Fragment>;
168                   
170 // i64 setult:
171 def : I64SETCCNegCond<setult, I64LGEr64>;
172 def : I64SELECTNegCond<setult, I64LGEr64>;
174 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
175 // i64 setgt/setle:
176 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
178 def CGTr64sgt:
179     CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
180                        (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
182 def CGTr64eq:
183     CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
184                        (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
185     
186 def CGTr64compare:
187     CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
188                         (XSWDv2i64 CGTr64sgt.Fragment),
189                         CGTr64eq.Fragment)>;
191 def CGTv2i64sgt:
192     CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
194 def CGTv2i64eq:
195     CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
196     
197 def CGTv2i64compare:
198     CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
199                         (XSWDv2i64 CGTr64sgt.Fragment),
200                         CGTv2i64eq.Fragment)>;
202 multiclass CompareGreaterThan64 {
203   // Plain old comparison, converts back to i32 scalar
204   def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
205   def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
207   // SELB mask from FSM:
208   def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
209                              (FSMv4i32 CGTr64compare.Fragment), R32C))>;
210   def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
211                                (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
214 defm I64GT: CompareLogicalGreaterThan64;
216 def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
217 //def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
218 //                  I64GTv2i64.Fragment>;
220 // i64 setult:
221 def : I64SETCCNegCond<setle, I64GTr64>;
222 def : I64SELECTNegCond<setle, I64GTr64>;
224 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
225 // i64 setge/setlt:
226 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
227     
228 def CGEr64compare:
229     CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
230                                           CGTr64eq.Fragment)), 0xb)>;
232 def CGEv2i64compare:
233     CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
234                                           CGTv2i64eq.Fragment)), 0xf)>;
236 multiclass CompareGreaterEqual64 {
237   // Plain old comparison, converts back to i32 scalar
238   def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
239   def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
241   // SELB mask from FSM:
242   def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
243   def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
246 defm I64GE: CompareGreaterEqual64;
248 def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
249 def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
250           I64GEv2i64.Fragment>;
252 // i64 setult:
253 def : I64SETCCNegCond<setlt, I64GEr64>;
254 def : I64SELECTNegCond<setlt, I64GEr64>;
256 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
257 // v2i64, i64 add
258 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
260 class v2i64_add_cg<dag lhs, dag rhs>:
261     CodeFrag<(CGv4i32 lhs, rhs)>;
263 class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
264     CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
266 class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
267     v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
269 def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
270            (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
271                                   (COPY_TO_REGCLASS R64C:$rB, VECREG),
272                                   (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
274 def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
275                     (v4i32 VECREG:$rCGmask)),
276            v2i64_add<(v2i64 VECREG:$rA),
277                      (v2i64 VECREG:$rB),
278                      (v4i32 VECREG:$rCGmask)>.Fragment>;
280 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
281 // v2i64, i64 subtraction
282 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
284 class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
286 class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
287     CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
289 def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
290            (COPY_TO_REGCLASS 
291                v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
292                          (COPY_TO_REGCLASS R64C:$rB, VECREG),
293                          v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
294                                       (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
295                                   (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
297 def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
298                     (v4i32 VECREG:$rCGmask)),
299            v2i64_sub<(v2i64 VECREG:$rA),
300                      (v2i64 VECREG:$rB),
301                      v2i64_sub_bg<(v2i64 VECREG:$rA),
302                                   (v2i64 VECREG:$rB)>.Fragment,
303                      (v4i32 VECREG:$rCGmask)>.Fragment>;
305 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
306 // v2i64, i64 multiply
308 // Note: i64 multiply is simply the vector->scalar conversion of the
309 // full-on v2i64 multiply, since the entire vector has to be manipulated
310 // anyway.
311 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
313 class v2i64_mul_ahi64<dag rA> :
314     CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
316 class v2i64_mul_bhi64<dag rB> :
317     CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
319 class v2i64_mul_alo64<dag rB> :
320     CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
322 class v2i64_mul_blo64<dag rB> :
323     CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
325 class v2i64_mul_ashlq2<dag rA>:
326     CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
328 class v2i64_mul_ashlq4<dag rA>:
329     CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
331 class v2i64_mul_bshlq2<dag rB> :
332     CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
334 class v2i64_mul_bshlq4<dag rB> :
335     CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
337 class v2i64_highprod<dag rA, dag rB>:
338     CodeFrag<(Av4i32
339                 (Av4i32
340                   (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment,     // a1 x b3
341                              v2i64_mul_ahi64<rA>.Fragment),
342                   (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment,      // a0 x b3
343                              v2i64_mul_bshlq4<rB>.Fragment)),
344                 (Av4i32
345                   (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
346                              v2i64_mul_ashlq4<rA>.Fragment),
347                   (Av4i32
348                       (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
349                                  v2i64_mul_bhi64<rB>.Fragment),
350                     (Av4i32
351                       (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
352                                  v2i64_mul_bhi64<rB>.Fragment),
353                       (Av4i32
354                         (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
355                                    v2i64_mul_bshlq2<rB>.Fragment),
356                         (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
357                                    v2i64_mul_bshlq2<rB>.Fragment))))))>;
359 class v2i64_mul_a3_b3<dag rA, dag rB>:
360     CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
361                         v2i64_mul_blo64<rB>.Fragment)>;
363 class v2i64_mul_a2_b3<dag rA, dag rB>:
364     CodeFrag<(SELBv4i32 (SHLQBYIv4i32
365                           (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
366                                        v2i64_mul_bshlq2<rB>.Fragment), 0x2),
367                         (ILv4i32 0),
368                         (FSMBIv4i32 0xc3c3))>;
370 class v2i64_mul_a3_b2<dag rA, dag rB>:
371     CodeFrag<(SELBv4i32 (SHLQBYIv4i32
372                           (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
373                                        v2i64_mul_ashlq2<rA>.Fragment), 0x2),
374                         (ILv4i32 0),
375                         (FSMBIv4i32 0xc3c3))>;
377 class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
378     v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
379                         v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
380               v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
382 class v2i64_mul<dag rA, dag rB, dag rCGmask>:
383     v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
384               (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
385                          (ILv4i32 0),
386                          (FSMBIv4i32 0x0f0f)),
387               rCGmask>;
389 def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
390           (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
391                                  (COPY_TO_REGCLASS R64C:$rB, VECREG),
392                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
394 def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
395                     (v4i32 VECREG:$rCGmask)),
396           v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
397                     (v4i32 VECREG:$rCGmask)>.Fragment>;
399 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
400 // f64 comparisons
401 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
403 // selb instruction definition for i64. Note that the selection mask is
404 // a vector, produced by various forms of FSM:
405 def SELBf64_cond:
406    SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
407             [(set R64FP:$rT,
408                   (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;