4 /* For x86_64, do the following:
6 * - check that constants are used only in
8 * - ensure immediates always fit in 32b
9 * - expose machine register contraints
10 * on instructions like division.
11 * - implement fast locals (the streak of
12 * constant allocX in the first basic block)
13 * - recognize complex addressing modes
15 * Invariant: the use counts that are used
16 * in sel() must be sound. This
17 * is not so trivial, maybe the
18 * dce should be moved out...
21 static int amatch(Addr
*, Num
*, Ref
, Fn
*);
30 switch (fn
->con
[r
.val
].type
) {
32 /* we only support the 'small'
33 * code model of the ABI, this
34 * means that we can always
35 * address data with 32bits
39 val
= fn
->con
[r
.val
].bits
.i
;
40 return (val
< INT32_MIN
|| val
> INT32_MAX
);
42 die("invalid constant");
51 return fn
->tmp
[r
.val
].slot
;
55 hascon(Ref r
, Con
**pc
, Fn
*fn
)
59 *pc
= &fn
->con
[r
.val
];
62 *pc
= &fn
->mem
[r
.val
].offset
;
70 fixarg(Ref
*r
, int k
, Ins
*i
, Fn
*fn
)
80 op
= i
? i
->op
: Ocopy
;
81 if (KBASE(k
) == 1 && rtype(r0
) == RCon
) {
82 /* load floating points from memory
83 * slots, they can't be used as
87 vgrow(&fn
->mem
, ++fn
->nmem
);
88 memset(&a
, 0, sizeof a
);
89 a
.offset
.type
= CAddr
;
90 n
= stashbits(&fn
->con
[r0
.val
].bits
, KWIDE(k
) ? 8 : 4);
91 /* quote the name so that we do not
92 * add symbol prefixes on the apple
95 sprintf(buf
, "\"%sfp%d\"", T
.asloc
, n
);
96 a
.offset
.sym
.id
= intern(buf
);
97 fn
->mem
[fn
->nmem
-1] = a
;
99 else if (op
!= Ocopy
&& k
== Kl
&& noimm(r0
, fn
)) {
100 /* load constants that do not fit in
101 * a 32bit signed integer into a
104 r1
= newtmp("isel", Kl
, fn
);
105 emit(Ocopy
, Kl
, r1
, r0
, R
);
108 /* load fast locals' addresses into
109 * temporaries right before the
112 r1
= newtmp("isel", Kl
, fn
);
113 emit(Oaddr
, Kl
, r1
, SLOT(s
), R
);
115 else if (T
.apple
&& hascon(r0
, &c
, fn
)
116 && c
->type
== CAddr
&& c
->sym
.type
== SThr
) {
117 r1
= newtmp("isel", Kl
, fn
);
119 r2
= newtmp("isel", Kl
, fn
);
120 cc
= (Con
){.type
= CBits
};
121 cc
.bits
.i
= c
->bits
.i
;
122 r3
= newcon(&cc
, fn
);
123 emit(Oadd
, Kl
, r1
, r2
, r3
);
126 emit(Ocopy
, Kl
, r2
, TMP(RAX
), R
);
127 r2
= newtmp("isel", Kl
, fn
);
128 r3
= newtmp("isel", Kl
, fn
);
129 emit(Ocall
, 0, R
, r3
, CALL(17));
130 emit(Ocopy
, Kl
, TMP(RDI
), r2
, R
);
131 emit(Oload
, Kl
, r3
, r2
, R
);
134 r3
= newcon(&cc
, fn
);
135 emit(Oload
, Kl
, r2
, r3
, R
);
136 if (rtype(r0
) == RMem
) {
137 m
= &fn
->mem
[r0
.val
];
138 m
->offset
.type
= CUndef
;
143 else if (!(isstore(op
) && r
== &i
->arg
[1])
144 && !isload(op
) && op
!= Ocall
&& rtype(r0
) == RCon
145 && fn
->con
[r0
.val
].type
== CAddr
) {
146 /* apple as does not support 32-bit
147 * absolute addressing, use a rip-
148 * relative leaq instead
150 r1
= newtmp("isel", Kl
, fn
);
151 emit(Oaddr
, Kl
, r1
, r0
, R
);
153 else if (rtype(r0
) == RMem
) {
154 /* eliminate memory operands of
155 * the form $foo(%rip, ...)
157 m
= &fn
->mem
[r0
.val
];
159 if (m
->offset
.type
== CAddr
) {
160 r0
= newtmp("isel", Kl
, fn
);
161 emit(Oaddr
, Kl
, r0
, newcon(&m
->offset
, fn
), R
);
162 m
->offset
.type
= CUndef
;
170 seladdr(Ref
*r
, Num
*tn
, Fn
*fn
)
176 if (rtype(r0
) == RTmp
) {
177 memset(&a
, 0, sizeof a
);
178 if (!amatch(&a
, tn
, r0
, fn
))
181 if (a
.offset
.type
== CAddr
) {
182 /* apple as does not support
183 * $foo(%r0, %r1, M); try to
184 * rewrite it or bail out if
187 if (!req(a
.index
, R
) || rtype(a
.base
) != RTmp
)
196 vgrow(&fn
->mem
, ++fn
->nmem
);
197 fn
->mem
[fn
->nmem
-1] = a
;
198 chuse(a
.base
, +1, fn
);
199 chuse(a
.index
, +1, fn
);
200 *r
= MEM(fn
->nmem
-1);
205 cmpswap(Ref arg
[2], int op
)
215 return rtype(arg
[0]) == RCon
;
219 selcmp(Ref arg
[2], int k
, int swap
, Fn
*fn
)
229 emit(Oxcmp
, k
, R
, arg
[1], arg
[0]);
231 if (rtype(arg
[0]) == RCon
) {
233 icmp
->arg
[1] = newtmp("isel", k
, fn
);
234 emit(Ocopy
, k
, icmp
->arg
[1], arg
[0], R
);
235 fixarg(&curi
->arg
[0], k
, curi
, fn
);
237 fixarg(&icmp
->arg
[0], k
, icmp
, fn
);
238 fixarg(&icmp
->arg
[1], k
, icmp
, fn
);
242 sel(Ins i
, Num
*tn
, Fn
*fn
)
245 int x
, j
, k
, kc
, sh
, swap
;
248 if (rtype(i
.to
) == RTmp
)
249 if (!isreg(i
.to
) && !isreg(i
.arg
[0]) && !isreg(i
.arg
[1]))
250 if (fn
->tmp
[i
.to
.val
].nuse
== 0) {
251 chuse(i
.arg
[0], -1, fn
);
252 chuse(i
.arg
[1], -1, fn
);
264 if (i
.op
== Odiv
|| i
.op
== Oudiv
)
265 r0
= TMP(RAX
), r1
= TMP(RDX
);
267 r0
= TMP(RDX
), r1
= TMP(RAX
);
268 emit(Ocopy
, k
, i
.to
, r0
, R
);
269 emit(Ocopy
, k
, R
, r1
, R
);
270 if (rtype(i
.arg
[1]) == RCon
) {
271 /* immediates not allowed for
274 r0
= newtmp("isel", k
, fn
);
277 if (fn
->tmp
[r0
.val
].slot
!= -1)
278 err("unlikely argument %%%s in %s",
279 fn
->tmp
[r0
.val
].name
, optab
[i
.op
].name
);
280 if (i
.op
== Odiv
|| i
.op
== Orem
) {
281 emit(Oxidiv
, k
, R
, r0
, R
);
282 emit(Osign
, k
, TMP(RDX
), TMP(RAX
), R
);
284 emit(Oxdiv
, k
, R
, r0
, R
);
285 emit(Ocopy
, k
, TMP(RDX
), CON_Z
, R
);
287 emit(Ocopy
, k
, TMP(RAX
), i
.arg
[0], R
);
288 fixarg(&curi
->arg
[0], k
, curi
, fn
);
289 if (rtype(i
.arg
[1]) == RCon
)
290 emit(Ocopy
, k
, r0
, i
.arg
[1], R
);
296 if (rtype(r0
) == RCon
)
298 if (fn
->tmp
[r0
.val
].slot
!= -1)
299 err("unlikely argument %%%s in %s",
300 fn
->tmp
[r0
.val
].name
, optab
[i
.op
].name
);
302 emit(Ocopy
, Kw
, R
, TMP(RCX
), R
);
305 emit(Ocopy
, Kw
, TMP(RCX
), r0
, R
);
306 fixarg(&i1
->arg
[0], argcls(&i
, 0), i1
, fn
);
309 r0
= newtmp("utof", Kl
, fn
);
310 emit(Osltof
, k
, i
.to
, r0
, R
);
311 emit(Oextuw
, Kl
, r0
, i
.arg
[0], R
);
312 fixarg(&curi
->arg
[0], k
, curi
, fn
);
315 /* %mask =l and %arg.0, 1
316 * %isbig =l shr %arg.0, 63
317 * %divided =l shr %arg.0, %isbig
318 * %or =l or %mask, %divided
319 * %float =d sltof %or
320 * %cast =l cast %float
321 * %addend =l shl %isbig, 52
322 * %sum =l add %cast, %addend
323 * %result =d cast %sum
325 r0
= newtmp("utof", k
, fn
);
331 tmp
[j
] = newtmp("utof", Kl
, fn
);
333 tmp
[j
] = newtmp("utof", kc
, fn
);
334 emit(Ocast
, k
, i
.to
, tmp
[6], R
);
335 emit(Oadd
, kc
, tmp
[6], tmp
[4], tmp
[5]);
336 emit(Oshl
, kc
, tmp
[5], tmp
[1], getcon(sh
, fn
));
337 emit(Ocast
, kc
, tmp
[4], r0
, R
);
338 emit(Osltof
, k
, r0
, tmp
[3], R
);
339 emit(Oor
, Kl
, tmp
[3], tmp
[0], tmp
[2]);
340 emit(Oshr
, Kl
, tmp
[2], i
.arg
[0], tmp
[1]);
342 emit(Oshr
, Kl
, tmp
[1], i
.arg
[0], getcon(63, fn
));
343 fixarg(&curi
->arg
[0], Kl
, curi
, fn
);
344 emit(Oand
, Kl
, tmp
[0], i
.arg
[0], getcon(1, fn
));
345 fixarg(&curi
->arg
[0], Kl
, curi
, fn
);
350 tmp
[4] = getcon(0xdf000000, fn
);
355 tmp
[4] = getcon(0xc3e0000000000000, fn
);
358 r0
= newtmp("ftou", Kl
, fn
);
359 emit(Ocopy
, Kw
, i
.to
, r0
, R
);
364 /* %try0 =l {s,d}tosi %fp
365 * %mask =l sar %try0, 63
367 * mask is all ones if the first
368 * try was oob, all zeroes o.w.
370 * %fps ={s,d} sub %fp, (1<<63)
371 * %try1 =l {s,d}tosi %fps
373 * %tmp =l and %mask, %try1
374 * %res =l or %tmp, %try0
376 r0
= newtmp("ftou", kc
, fn
);
378 tmp
[j
] = newtmp("ftou", Kl
, fn
);
379 emit(Oor
, Kl
, i
.to
, tmp
[0], tmp
[3]);
380 emit(Oand
, Kl
, tmp
[3], tmp
[2], tmp
[1]);
381 emit(i
.op
, Kl
, tmp
[2], r0
, R
);
382 emit(Oadd
, kc
, r0
, tmp
[4], i
.arg
[0]);
383 i1
= curi
; /* fixarg() can change curi */
384 fixarg(&i1
->arg
[0], kc
, i1
, fn
);
385 fixarg(&i1
->arg
[1], kc
, i1
, fn
);
386 emit(Osar
, Kl
, tmp
[1], tmp
[0], getcon(63, fn
));
387 emit(i
.op
, Kl
, tmp
[0], i
.arg
[0], R
);
388 fixarg(&curi
->arg
[0], Kl
, curi
, fn
);
398 if (rtype(i
.arg
[0]) == RCon
) {
404 seladdr(&i
.arg
[1], tn
, fn
);
407 seladdr(&i
.arg
[0], tn
, fn
);
431 i1
= curi
; /* fixarg() can change curi */
432 fixarg(&i1
->arg
[0], argcls(&i
, 0), i1
, fn
);
433 fixarg(&i1
->arg
[1], argcls(&i
, 1), i1
, fn
);
438 salloc(i
.to
, i
.arg
[0], fn
);
445 if (iscmp(i
.op
, &kc
, &x
)) {
448 /* zf is set when operands are
449 * unordered, so we may have to
452 r0
= newtmp("isel", Kw
, fn
);
453 r1
= newtmp("isel", Kw
, fn
);
454 emit(Oand
, Kw
, i
.to
, r0
, r1
);
455 emit(Oflagfo
, k
, r1
, R
, R
);
459 r0
= newtmp("isel", Kw
, fn
);
460 r1
= newtmp("isel", Kw
, fn
);
461 emit(Oor
, Kw
, i
.to
, r0
, r1
);
462 emit(Oflagfuo
, k
, r1
, R
, R
);
466 swap
= cmpswap(i
.arg
, x
);
469 emit(Oflag
+x
, k
, i
.to
, R
, R
);
470 selcmp(i
.arg
, kc
, swap
, fn
);
473 die("unknown instruction %s", optab
[i
.op
].name
);
476 while (i0
>curi
&& --i0
) {
477 assert(rslot(i0
->arg
[0], fn
) == -1);
478 assert(rslot(i0
->arg
[1], fn
) == -1);
483 flagi(Ins
*i0
, Ins
*i
)
487 if (amd64_op
[i
->op
].zflag
)
489 if (amd64_op
[i
->op
].lflag
)
497 seljmp(Blk
*b
, Fn
*fn
)
504 if (b
->jmp
.type
== Jret0
505 || b
->jmp
.type
== Jjmp
506 || b
->jmp
.type
== Jhlt
)
508 assert(b
->jmp
.type
== Jjnz
);
512 assert(rtype(r
) == RTmp
);
513 if (b
->s1
== b
->s2
) {
519 fi
= flagi(b
->ins
, &b
->ins
[b
->nins
]);
520 if (!fi
|| !req(fi
->to
, r
)) {
521 selcmp((Ref
[2]){r
, CON_Z
}, Kw
, 0, fn
);
522 b
->jmp
.type
= Jjf
+ Cine
;
524 else if (iscmp(fi
->op
, &k
, &c
)
525 && c
!= NCmpI
+Cfeq
/* see sel() */
526 && c
!= NCmpI
+Cfne
) {
527 swap
= cmpswap(fi
->arg
, c
);
531 selcmp(fi
->arg
, k
, swap
, fn
);
532 *fi
= (Ins
){.op
= Onop
};
534 b
->jmp
.type
= Jjf
+ c
;
536 else if (fi
->op
== Oand
&& t
->nuse
== 1
537 && (rtype(fi
->arg
[0]) == RTmp
||
538 rtype(fi
->arg
[1]) == RTmp
)) {
541 b
->jmp
.type
= Jjf
+ Cine
;
542 if (rtype(fi
->arg
[1]) == RCon
) {
544 fi
->arg
[1] = fi
->arg
[0];
549 /* since flags are not tracked in liveness,
550 * the result of the flag-setting instruction
551 * has to be marked as live
554 emit(Ocopy
, Kw
, R
, r
, R
);
555 b
->jmp
.type
= Jjf
+ Cine
;
568 /* mgen generated code
570 * (with-vars (o b i s)
572 * (ob (add (con o) (tmp b)))
573 * (bis (add (tmp b) (mul (tmp i) (con s 1 2 4 8))))
574 * (ois (add (con o) (mul (tmp i) (con s 1 2 4 8))))
575 * (obis (add (con o) (tmp b) (mul (tmp i) (con s 1 2 4 8))))
576 * (bi1 (add (tmp b) (tmp i)))
577 * (obi1 (add (con o) (tmp b) (tmp i)))
582 opn(int op
, int l
, int r
)
584 static uchar Oaddtbl
[91] = {
593 11,11,5,8,9,5,12,9,5,
594 7,7,5,8,9,5,12,9,5,5,
595 11,11,5,8,9,5,12,9,5,5,5,
596 4,4,9,10,9,9,12,9,9,9,9,9,
597 7,7,5,8,9,5,12,9,5,5,5,9,5,
611 return Oaddtbl
[(l
+ l
*l
)/2 + r
];
618 refn(Ref r
, Num
*tn
, Con
*con
)
628 if (con
[r
.val
].type
!= CBits
)
630 n
= con
[r
.val
].bits
.i
;
631 if (n
== 8 || n
== 4 || n
== 2 || n
== 1)
639 static bits match
[13] = {
642 [6] = BIT(Pob
) | BIT(Pois
),
643 [7] = BIT(Pob
) | BIT(Pobi1
),
644 [8] = BIT(Pbi1
) | BIT(Pbis
),
645 [9] = BIT(Pbi1
) | BIT(Pobi1
),
646 [10] = BIT(Pbi1
) | BIT(Pbis
) | BIT(Pobi1
) | BIT(Pobis
),
647 [11] = BIT(Pob
) | BIT(Pobi1
) | BIT(Pobis
),
648 [12] = BIT(Pbi1
) | BIT(Pobi1
) | BIT(Pobis
),
651 static uchar
*matcher
[] = {
656 5,1,8,5,27,1,5,1,2,5,13,3,1,1,3,3,3,2,0,1,
663 5,3,9,9,10,33,12,35,45,1,5,3,11,9,7,9,4,9,
664 17,1,3,0,3,1,3,2,0,3,1,1,3,0,34,1,37,1,5,2,
665 5,7,2,7,8,37,29,1,3,0,1,32
668 5,2,10,7,11,19,49,1,1,3,3,3,2,1,3,0,3,1,0,
669 1,3,0,5,1,8,5,25,1,5,1,2,5,13,3,1,1,3,3,3,
670 2,0,1,3,3,3,2,26,1,51,1,5,1,6,5,9,1,3,0,51,
678 /* end of generated code */
681 anumber(Num
*tn
, Blk
*b
, Con
*con
)
686 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++) {
687 if (rtype(i
->to
) != RTmp
)
692 n
->nl
= refn(n
->l
, tn
, con
);
693 n
->nr
= refn(n
->r
, tn
, con
);
694 n
->n
= opn(i
->op
, n
->nl
, n
->nr
);
699 adisp(Con
*c
, Num
*tn
, Ref r
, Fn
*fn
, int s
)
705 assert(rtype(r
) == RTmp
);
706 n
= refn(r
, tn
, fn
->con
);
707 if (!(match
[n
] & BIT(Pob
)))
709 runmatch(matcher
[Pob
], tn
, r
, v
);
710 assert(rtype(v
[0]) == RCon
);
711 addcon(c
, &fn
->con
[v
[0].val
], s
);
718 amatch(Addr
*a
, Num
*tn
, Ref r
, Fn
*fn
)
720 static int pat
[] = {Pobis
, Pobi1
, Pbis
, Pois
, Pbi1
, -1};
721 Ref ro
, rb
, ri
, rs
, v
[4];
725 if (rtype(r
) != RTmp
)
728 n
= refn(r
, tn
, fn
->con
);
729 memset(v
, 0, sizeof v
);
730 for (p
=pat
; *p
>=0; p
++)
731 if (match
[n
] & BIT(*p
)) {
732 runmatch(matcher
[*p
], tn
, r
, v
);
738 memset(&co
, 0, sizeof co
);
740 rb
= adisp(&co
, tn
, v
[1], fn
, 1);
745 if (*p
< 0 && co
.type
!= CUndef
)
746 if (amatch(a
, tn
, rb
, fn
))
747 return addcon(&a
->offset
, &co
, 1);
749 assert(rtype(ro
) == RCon
);
750 c
= &fn
->con
[ro
.val
];
751 if (!addcon(&co
, c
, 1))
755 assert(rtype(rs
) == RCon
);
756 c
= &fn
->con
[rs
.val
];
757 assert(c
->type
== CBits
);
760 ri
= adisp(&co
, tn
, ri
, fn
, s
);
761 *a
= (Addr
){co
, rb
, ri
, s
};
763 if (rtype(ri
) == RTmp
)
764 if (fn
->tmp
[ri
.val
].slot
!= -1) {
766 || fn
->tmp
[rb
.val
].slot
!= -1)
771 if (!req(a
->base
, R
)) {
772 assert(rtype(a
->base
) == RTmp
);
773 s
= fn
->tmp
[a
->base
.val
].slot
;
780 /* instruction selection
781 * requires use counts (as given by parsing)
794 /* assign slots to fast allocs */
796 /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
797 for (al
=Oalloc
, n
=4; al
<=Oalloc1
; al
++, n
*=2)
798 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++)
800 if (rtype(i
->arg
[0]) != RCon
)
802 sz
= fn
->con
[i
->arg
[0].val
].bits
.i
;
803 if (sz
< 0 || sz
>= INT_MAX
-15)
804 err("invalid alloc size %"PRId64
, sz
);
805 sz
= (sz
+ n
-1) & -n
;
807 if (sz
> INT_MAX
- fn
->slot
)
808 die("alloc too large");
809 fn
->tmp
[i
->to
.val
].slot
= fn
->slot
;
811 fn
->salign
= 2 + al
- Oalloc
;
812 *i
= (Ins
){.op
= Onop
};
815 /* process basic blocks */
817 num
= emalloc(n
* sizeof num
[0]);
818 for (b
=fn
->start
; b
; b
=b
->link
) {
820 for (sb
=(Blk
*[3]){b
->s1
, b
->s2
, 0}; *sb
; sb
++)
821 for (p
=(*sb
)->phi
; p
; p
=p
->link
) {
822 for (a
=0; p
->blk
[a
] != b
; a
++)
823 assert(a
+1 < p
->narg
);
824 fixarg(&p
->arg
[a
], p
->cls
, 0, fn
);
826 memset(num
, 0, n
* sizeof num
[0]);
827 anumber(num
, b
, fn
->con
);
829 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;)
831 b
->nins
= &insb
[NIns
] - curi
;
832 idup(&b
->ins
, curi
, b
->nins
);
837 fprintf(stderr
, "\n> After instruction selection:\n");