Simplify int mul/udiv/urem of 2^N into shl/shr/and.
[qbe.git] / util.c
blob2e4b4cc57fb52ce590fcfe5e141cad77c7f8c50e
1 #include "all.h"
2 #include <stdarg.h>
4 typedef struct Bitset Bitset;
5 typedef struct Vec Vec;
6 typedef struct Bucket Bucket;
8 struct Vec {
9 ulong mag;
10 Pool pool;
11 size_t esz;
12 ulong cap;
13 union {
14 long long ll;
15 long double ld;
16 void *ptr;
17 } align[];
20 struct Bucket {
21 uint nstr;
22 char **str;
25 enum {
26 VMin = 2,
27 VMag = 0xcabba9e,
28 NPtr = 256,
29 IBits = 12,
30 IMask = (1<<IBits) - 1,
33 Typ *typ;
34 Ins insb[NIns], *curi;
36 static void *ptr[NPtr];
37 static void **pool = ptr;
38 static int nptr = 1;
40 static Bucket itbl[IMask+1]; /* string interning table */
42 uint32_t
43 hash(char *s)
45 uint32_t h;
47 for (h=0; *s; ++s)
48 h = *s + 17*h;
49 return h;
52 void
53 die_(char *file, char *s, ...)
55 va_list ap;
57 fprintf(stderr, "%s: dying: ", file);
58 va_start(ap, s);
59 vfprintf(stderr, s, ap);
60 va_end(ap);
61 fputc('\n', stderr);
62 abort();
65 void *
66 emalloc(size_t n)
68 void *p;
70 p = calloc(1, n);
71 if (!p)
72 die("emalloc, out of memory");
73 return p;
76 void *
77 alloc(size_t n)
79 void **pp;
81 if (n == 0)
82 return 0;
83 if (nptr >= NPtr) {
84 pp = emalloc(NPtr * sizeof(void *));
85 pp[0] = pool;
86 pool = pp;
87 nptr = 1;
89 return pool[nptr++] = emalloc(n);
92 void
93 freeall()
95 void **pp;
97 for (;;) {
98 for (pp = &pool[1]; pp < &pool[nptr]; pp++)
99 free(*pp);
100 pp = pool[0];
101 if (!pp)
102 break;
103 free(pool);
104 pool = pp;
105 nptr = NPtr;
107 nptr = 1;
110 void *
111 vnew(ulong len, size_t esz, Pool pool)
113 void *(*f)(size_t);
114 ulong cap;
115 Vec *v;
117 for (cap=VMin; cap<len; cap*=2)
119 f = pool == PHeap ? emalloc : alloc;
120 v = f(cap * esz + sizeof(Vec));
121 v->mag = VMag;
122 v->cap = cap;
123 v->esz = esz;
124 v->pool = pool;
125 return v + 1;
128 void
129 vfree(void *p)
131 Vec *v;
133 v = (Vec *)p - 1;
134 assert(v->mag == VMag);
135 if (v->pool == PHeap) {
136 v->mag = 0;
137 free(v);
141 void
142 vgrow(void *vp, ulong len)
144 Vec *v;
145 void *v1;
147 v = *(Vec **)vp - 1;
148 assert(v+1 && v->mag == VMag);
149 if (v->cap >= len)
150 return;
151 v1 = vnew(len, v->esz, v->pool);
152 memcpy(v1, v+1, v->cap * v->esz);
153 vfree(v+1);
154 *(Vec **)vp = v1;
157 void
158 strf(char str[NString], char *s, ...)
160 va_list ap;
162 va_start(ap, s);
163 vsnprintf(str, NString, s, ap);
164 va_end(ap);
167 uint32_t
168 intern(char *s)
170 Bucket *b;
171 uint32_t h;
172 uint i, n;
174 h = hash(s) & IMask;
175 b = &itbl[h];
176 n = b->nstr;
178 for (i=0; i<n; i++)
179 if (strcmp(s, b->str[i]) == 0)
180 return h + (i<<IBits);
182 if (n == 1<<(32-IBits))
183 die("interning table overflow");
184 if (n == 0)
185 b->str = vnew(1, sizeof b->str[0], PHeap);
186 else if ((n & (n-1)) == 0)
187 vgrow(&b->str, n+n);
189 b->str[n] = emalloc(strlen(s)+1);
190 b->nstr = n + 1;
191 strcpy(b->str[n], s);
192 return h + (n<<IBits);
195 char *
196 str(uint32_t id)
198 assert(id>>IBits < itbl[id&IMask].nstr);
199 return itbl[id&IMask].str[id>>IBits];
203 isreg(Ref r)
205 return rtype(r) == RTmp && r.val < Tmp0;
209 iscmp(int op, int *pk, int *pc)
211 if (Ocmpw <= op && op <= Ocmpw1) {
212 *pc = op - Ocmpw;
213 *pk = Kw;
215 else if (Ocmpl <= op && op <= Ocmpl1) {
216 *pc = op - Ocmpl;
217 *pk = Kl;
219 else if (Ocmps <= op && op <= Ocmps1) {
220 *pc = NCmpI + op - Ocmps;
221 *pk = Ks;
223 else if (Ocmpd <= op && op <= Ocmpd1) {
224 *pc = NCmpI + op - Ocmpd;
225 *pk = Kd;
227 else
228 return 0;
229 return 1;
233 argcls(Ins *i, int n)
235 return optab[i->op].argcls[n][i->cls];
238 void
239 emit(int op, int k, Ref to, Ref arg0, Ref arg1)
241 if (curi == insb)
242 die("emit, too many instructions");
243 *--curi = (Ins){
244 .op = op, .cls = k,
245 .to = to, .arg = {arg0, arg1}
249 void
250 emiti(Ins i)
252 emit(i.op, i.cls, i.to, i.arg[0], i.arg[1]);
255 void
256 idup(Ins **pd, Ins *s, ulong n)
258 *pd = alloc(n * sizeof(Ins));
259 if (n)
260 memcpy(*pd, s, n * sizeof(Ins));
263 Ins *
264 icpy(Ins *d, Ins *s, ulong n)
266 if (n)
267 memcpy(d, s, n * sizeof(Ins));
268 return d + n;
271 static int cmptab[][2] ={
272 /* negation swap */
273 [Ciule] = {Ciugt, Ciuge},
274 [Ciult] = {Ciuge, Ciugt},
275 [Ciugt] = {Ciule, Ciult},
276 [Ciuge] = {Ciult, Ciule},
277 [Cisle] = {Cisgt, Cisge},
278 [Cislt] = {Cisge, Cisgt},
279 [Cisgt] = {Cisle, Cislt},
280 [Cisge] = {Cislt, Cisle},
281 [Cieq] = {Cine, Cieq},
282 [Cine] = {Cieq, Cine},
283 [NCmpI+Cfle] = {NCmpI+Cfgt, NCmpI+Cfge},
284 [NCmpI+Cflt] = {NCmpI+Cfge, NCmpI+Cfgt},
285 [NCmpI+Cfgt] = {NCmpI+Cfle, NCmpI+Cflt},
286 [NCmpI+Cfge] = {NCmpI+Cflt, NCmpI+Cfle},
287 [NCmpI+Cfeq] = {NCmpI+Cfne, NCmpI+Cfeq},
288 [NCmpI+Cfne] = {NCmpI+Cfeq, NCmpI+Cfne},
289 [NCmpI+Cfo] = {NCmpI+Cfuo, NCmpI+Cfo},
290 [NCmpI+Cfuo] = {NCmpI+Cfo, NCmpI+Cfuo},
294 cmpneg(int c)
296 assert(0 <= c && c < NCmp);
297 return cmptab[c][0];
301 cmpop(int c)
303 assert(0 <= c && c < NCmp);
304 return cmptab[c][1];
308 clsmerge(short *pk, short k)
310 short k1;
312 k1 = *pk;
313 if (k1 == Kx) {
314 *pk = k;
315 return 0;
317 if ((k1 == Kw && k == Kl) || (k1 == Kl && k == Kw)) {
318 *pk = Kw;
319 return 0;
321 return k1 != k;
325 phicls(int t, Tmp *tmp)
327 int t1;
329 t1 = tmp[t].phi;
330 if (!t1)
331 return t;
332 t1 = phicls(t1, tmp);
333 tmp[t].phi = t1;
334 return t1;
338 newtmp(char *prfx, int k, Fn *fn)
340 static int n;
341 int t;
343 t = fn->ntmp++;
344 vgrow(&fn->tmp, fn->ntmp);
345 memset(&fn->tmp[t], 0, sizeof(Tmp));
346 if (prfx)
347 strf(fn->tmp[t].name, "%s.%d", prfx, ++n);
348 fn->tmp[t].cls = k;
349 fn->tmp[t].slot = -1;
350 fn->tmp[t].nuse = +1;
351 fn->tmp[t].ndef = +1;
352 return TMP(t);
355 void
356 chuse(Ref r, int du, Fn *fn)
358 if (rtype(r) == RTmp)
359 fn->tmp[r.val].nuse += du;
363 symeq(Sym s0, Sym s1)
365 return s0.type == s1.type && s0.id == s1.id;
369 newcon(Con *c0, Fn *fn)
371 Con *c1;
372 int i;
374 for (i=1; i<fn->ncon; i++) {
375 c1 = &fn->con[i];
376 if (c0->type == c1->type
377 && symeq(c0->sym, c1->sym)
378 && c0->bits.i == c1->bits.i)
379 return CON(i);
381 vgrow(&fn->con, ++fn->ncon);
382 fn->con[i] = *c0;
383 return CON(i);
387 getcon(int64_t val, Fn *fn)
389 int c;
391 for (c=1; c<fn->ncon; c++)
392 if (fn->con[c].type == CBits
393 && fn->con[c].bits.i == val)
394 return CON(c);
395 vgrow(&fn->con, ++fn->ncon);
396 fn->con[c] = (Con){.type = CBits, .bits.i = val};
397 return CON(c);
401 addcon(Con *c0, Con *c1, int m)
403 if (m != 1 && c1->type == CAddr)
404 return 0;
405 if (c0->type == CUndef) {
406 *c0 = *c1;
407 c0->bits.i *= m;
408 } else {
409 if (c1->type == CAddr) {
410 if (c0->type == CAddr)
411 return 0;
412 c0->type = CAddr;
413 c0->sym = c1->sym;
415 c0->bits.i += c1->bits.i * m;
417 return 1;
420 void
421 salloc(Ref rt, Ref rs, Fn *fn)
423 Ref r0, r1;
424 int64_t sz;
426 /* we need to make sure
427 * the stack remains aligned
428 * (rsp = 0) mod 16
430 fn->dynalloc = 1;
431 if (rtype(rs) == RCon) {
432 sz = fn->con[rs.val].bits.i;
433 if (sz < 0 || sz >= INT_MAX-15)
434 err("invalid alloc size %"PRId64, sz);
435 sz = (sz + 15) & -16;
436 emit(Osalloc, Kl, rt, getcon(sz, fn), R);
437 } else {
438 /* r0 = (r + 15) & -16 */
439 r0 = newtmp("isel", Kl, fn);
440 r1 = newtmp("isel", Kl, fn);
441 emit(Osalloc, Kl, rt, r0, R);
442 emit(Oand, Kl, r0, r1, getcon(-16, fn));
443 emit(Oadd, Kl, r1, rs, getcon(15, fn));
444 if (fn->tmp[rs.val].slot != -1)
445 err("unlikely alloc argument %%%s for %%%s",
446 fn->tmp[rs.val].name, fn->tmp[rt.val].name);
450 void
451 bsinit(BSet *bs, uint n)
453 n = (n + NBit-1) / NBit;
454 bs->nt = n;
455 bs->t = alloc(n * sizeof bs->t[0]);
458 MAKESURE(NBit_is_64, NBit == 64);
459 inline static uint
460 popcnt(bits b)
462 b = (b & 0x5555555555555555) + ((b>>1) & 0x5555555555555555);
463 b = (b & 0x3333333333333333) + ((b>>2) & 0x3333333333333333);
464 b = (b & 0x0f0f0f0f0f0f0f0f) + ((b>>4) & 0x0f0f0f0f0f0f0f0f);
465 b += (b>>8);
466 b += (b>>16);
467 b += (b>>32);
468 return b & 0xff;
471 inline static int
472 firstbit(bits b)
474 int n;
476 n = 0;
477 if (!(b & 0xffffffff)) {
478 n += 32;
479 b >>= 32;
481 if (!(b & 0xffff)) {
482 n += 16;
483 b >>= 16;
485 if (!(b & 0xff)) {
486 n += 8;
487 b >>= 8;
489 if (!(b & 0xf)) {
490 n += 4;
491 b >>= 4;
493 n += (char[16]){4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0}[b & 0xf];
494 return n;
497 uint
498 bscount(BSet *bs)
500 uint i, n;
502 n = 0;
503 for (i=0; i<bs->nt; i++)
504 n += popcnt(bs->t[i]);
505 return n;
508 static inline uint
509 bsmax(BSet *bs)
511 return bs->nt * NBit;
514 void
515 bsset(BSet *bs, uint elt)
517 assert(elt < bsmax(bs));
518 bs->t[elt/NBit] |= BIT(elt%NBit);
521 void
522 bsclr(BSet *bs, uint elt)
524 assert(elt < bsmax(bs));
525 bs->t[elt/NBit] &= ~BIT(elt%NBit);
528 #define BSOP(f, op) \
529 void \
530 f(BSet *a, BSet *b) \
532 uint i; \
534 assert(a->nt == b->nt); \
535 for (i=0; i<a->nt; i++) \
536 a->t[i] op b->t[i]; \
539 BSOP(bscopy, =)
540 BSOP(bsunion, |=)
541 BSOP(bsinter, &=)
542 BSOP(bsdiff, &= ~)
545 bsequal(BSet *a, BSet *b)
547 uint i;
549 assert(a->nt == b->nt);
550 for (i=0; i<a->nt; i++)
551 if (a->t[i] != b->t[i])
552 return 0;
553 return 1;
556 void
557 bszero(BSet *bs)
559 memset(bs->t, 0, bs->nt * sizeof bs->t[0]);
562 /* iterates on a bitset, use as follows
564 * for (i=0; bsiter(set, &i); i++)
565 * use(i);
569 bsiter(BSet *bs, int *elt)
571 bits b;
572 uint t, i;
574 i = *elt;
575 t = i/NBit;
576 if (t >= bs->nt)
577 return 0;
578 b = bs->t[t];
579 b &= ~(BIT(i%NBit) - 1);
580 while (!b) {
581 ++t;
582 if (t >= bs->nt)
583 return 0;
584 b = bs->t[t];
586 *elt = NBit*t + firstbit(b);
587 return 1;
590 void
591 dumpts(BSet *bs, Tmp *tmp, FILE *f)
593 int t;
595 fprintf(f, "[");
596 for (t=Tmp0; bsiter(bs, &t); t++)
597 fprintf(f, " %s", tmp[t].name);
598 fprintf(f, " ]\n");
601 void
602 runmatch(uchar *code, Num *tn, Ref ref, Ref *var)
604 Ref stkbuf[20], *stk;
605 uchar *s, *pc;
606 int bc, i;
607 int n, nl, nr;
609 assert(rtype(ref) == RTmp);
610 stk = stkbuf;
611 pc = code;
612 while ((bc = *pc))
613 switch (bc) {
614 case 1: /* pushsym */
615 case 2: /* push */
616 assert(stk < &stkbuf[20]);
617 assert(rtype(ref) == RTmp);
618 nl = tn[ref.val].nl;
619 nr = tn[ref.val].nr;
620 if (bc == 1 && nl > nr) {
621 *stk++ = tn[ref.val].l;
622 ref = tn[ref.val].r;
623 } else {
624 *stk++ = tn[ref.val].r;
625 ref = tn[ref.val].l;
627 pc++;
628 break;
629 case 3: /* set */
630 var[*++pc] = ref;
631 if (*(pc + 1) == 0)
632 return;
633 /* fall through */
634 case 4: /* pop */
635 assert(stk > &stkbuf[0]);
636 ref = *--stk;
637 pc++;
638 break;
639 case 5: /* switch */
640 assert(rtype(ref) == RTmp);
641 n = tn[ref.val].n;
642 s = pc + 1;
643 for (i=*s++; i>0; i--, s++)
644 if (n == *s++)
645 break;
646 pc += *s;
647 break;
648 default: /* jump */
649 assert(bc >= 10);
650 pc = code + (bc - 10);
651 break;