qbe has its own magic
[qbe.git] / rv64 / isel.c
blob8921a07ad4da116018bf136bc6d364a7d2196ea7
1 #include "all.h"
3 static int
4 memarg(Ref *r, int op, Ins *i)
6 if (isload(op) || op == Ocall)
7 return r == &i->arg[0];
8 if (isstore(op))
9 return r == &i->arg[1];
10 return 0;
13 static int
14 immarg(Ref *r, int op, Ins *i)
16 return rv64_op[op].imm && r == &i->arg[1];
19 static void
20 fixarg(Ref *r, int k, Ins *i, Fn *fn)
22 char buf[32];
23 Ref r0, r1;
24 int s, n, op;
25 Con *c;
27 r0 = r1 = *r;
28 op = i ? i->op : Ocopy;
29 switch (rtype(r0)) {
30 case RCon:
31 c = &fn->con[r0.val];
32 if (c->type == CAddr && memarg(r, op, i))
33 break;
34 if (c->type == CBits && immarg(r, op, i))
35 if (-2048 <= c->bits.i && c->bits.i < 2048)
36 break;
37 r1 = newtmp("isel", k, fn);
38 if (KBASE(k) == 1) {
39 /* load floating points from memory
40 * slots, they can't be used as
41 * immediates
43 assert(c->type == CBits);
44 n = stashbits(&c->bits, KWIDE(k) ? 8 : 4);
45 vgrow(&fn->con, ++fn->ncon);
46 c = &fn->con[fn->ncon-1];
47 sprintf(buf, "\"%sfp%d\"", T.asloc, n);
48 *c = (Con){.type = CAddr};
49 c->sym.id = intern(buf);
50 emit(Oload, k, r1, CON(c-fn->con), R);
51 break;
53 emit(Ocopy, k, r1, r0, R);
54 break;
55 case RTmp:
56 if (isreg(r0))
57 break;
58 s = fn->tmp[r0.val].slot;
59 if (s != -1) {
60 /* aggregate passed by value on
61 * stack, or fast local address,
62 * replace with slot if we can
64 if (memarg(r, op, i)) {
65 r1 = SLOT(s);
66 break;
68 r1 = newtmp("isel", k, fn);
69 emit(Oaddr, k, r1, SLOT(s), R);
70 break;
72 if (k == Kw && fn->tmp[r0.val].cls == Kl) {
73 /* TODO: this sign extension isn't needed
74 * for 32-bit arithmetic instructions
76 r1 = newtmp("isel", k, fn);
77 emit(Oextsw, Kl, r1, r0, R);
78 } else {
79 assert(k == fn->tmp[r0.val].cls);
81 break;
83 *r = r1;
86 static void
87 negate(Ref *pr, Fn *fn)
89 Ref r;
91 r = newtmp("isel", Kw, fn);
92 emit(Oxor, Kw, *pr, r, getcon(1, fn));
93 *pr = r;
96 static void
97 selcmp(Ins i, int k, int op, Fn *fn)
99 Ins *icmp;
100 Ref r, r0, r1;
101 int sign, swap, neg;
103 switch (op) {
104 case Cieq:
105 r = newtmp("isel", k, fn);
106 emit(Oreqz, i.cls, i.to, r, R);
107 emit(Oxor, k, r, i.arg[0], i.arg[1]);
108 icmp = curi;
109 fixarg(&icmp->arg[0], k, icmp, fn);
110 fixarg(&icmp->arg[1], k, icmp, fn);
111 return;
112 case Cine:
113 r = newtmp("isel", k, fn);
114 emit(Ornez, i.cls, i.to, r, R);
115 emit(Oxor, k, r, i.arg[0], i.arg[1]);
116 icmp = curi;
117 fixarg(&icmp->arg[0], k, icmp, fn);
118 fixarg(&icmp->arg[1], k, icmp, fn);
119 return;
120 case Cisge: sign = 1, swap = 0, neg = 1; break;
121 case Cisgt: sign = 1, swap = 1, neg = 0; break;
122 case Cisle: sign = 1, swap = 1, neg = 1; break;
123 case Cislt: sign = 1, swap = 0, neg = 0; break;
124 case Ciuge: sign = 0, swap = 0, neg = 1; break;
125 case Ciugt: sign = 0, swap = 1, neg = 0; break;
126 case Ciule: sign = 0, swap = 1, neg = 1; break;
127 case Ciult: sign = 0, swap = 0, neg = 0; break;
128 case NCmpI+Cfeq:
129 case NCmpI+Cfge:
130 case NCmpI+Cfgt:
131 case NCmpI+Cfle:
132 case NCmpI+Cflt:
133 swap = 0, neg = 0;
134 break;
135 case NCmpI+Cfuo:
136 negate(&i.to, fn);
137 /* fall through */
138 case NCmpI+Cfo:
139 r0 = newtmp("isel", i.cls, fn);
140 r1 = newtmp("isel", i.cls, fn);
141 emit(Oand, i.cls, i.to, r0, r1);
142 op = KWIDE(k) ? Oceqd : Oceqs;
143 emit(op, i.cls, r0, i.arg[0], i.arg[0]);
144 icmp = curi;
145 fixarg(&icmp->arg[0], k, icmp, fn);
146 fixarg(&icmp->arg[1], k, icmp, fn);
147 emit(op, i.cls, r1, i.arg[1], i.arg[1]);
148 icmp = curi;
149 fixarg(&icmp->arg[0], k, icmp, fn);
150 fixarg(&icmp->arg[1], k, icmp, fn);
151 return;
152 case NCmpI+Cfne:
153 swap = 0, neg = 1;
154 i.op = KWIDE(k) ? Oceqd : Oceqs;
155 break;
156 default:
157 assert(0 && "unknown comparison");
159 if (op < NCmpI)
160 i.op = sign ? Ocsltl : Ocultl;
161 if (swap) {
162 r = i.arg[0];
163 i.arg[0] = i.arg[1];
164 i.arg[1] = r;
166 if (neg)
167 negate(&i.to, fn);
168 emiti(i);
169 icmp = curi;
170 fixarg(&icmp->arg[0], k, icmp, fn);
171 fixarg(&icmp->arg[1], k, icmp, fn);
174 static void
175 sel(Ins i, Fn *fn)
177 Ins *i0;
178 int ck, cc;
180 if (INRANGE(i.op, Oalloc, Oalloc1)) {
181 i0 = curi - 1;
182 salloc(i.to, i.arg[0], fn);
183 fixarg(&i0->arg[0], Kl, i0, fn);
184 return;
186 if (iscmp(i.op, &ck, &cc)) {
187 selcmp(i, ck, cc, fn);
188 return;
190 if (i.op != Onop) {
191 emiti(i);
192 i0 = curi; /* fixarg() can change curi */
193 fixarg(&i0->arg[0], argcls(&i, 0), i0, fn);
194 fixarg(&i0->arg[1], argcls(&i, 1), i0, fn);
198 static void
199 seljmp(Blk *b, Fn *fn)
201 /* TODO: replace cmp+jnz with beq/bne/blt[u]/bge[u] */
202 if (b->jmp.type == Jjnz)
203 fixarg(&b->jmp.arg, Kw, 0, fn);
206 void
207 rv64_isel(Fn *fn)
209 Blk *b, **sb;
210 Ins *i;
211 Phi *p;
212 uint n;
213 int al;
214 int64_t sz;
216 /* assign slots to fast allocs */
217 b = fn->start;
218 /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
219 for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
220 for (i=b->ins; i<&b->ins[b->nins]; i++)
221 if (i->op == al) {
222 if (rtype(i->arg[0]) != RCon)
223 break;
224 sz = fn->con[i->arg[0].val].bits.i;
225 if (sz < 0 || sz >= INT_MAX-15)
226 err("invalid alloc size %"PRId64, sz);
227 sz = (sz + n-1) & -n;
228 sz /= 4;
229 if (sz > INT_MAX - fn->slot)
230 die("alloc too large");
231 fn->tmp[i->to.val].slot = fn->slot;
232 fn->slot += sz;
233 *i = (Ins){.op = Onop};
236 for (b=fn->start; b; b=b->link) {
237 curi = &insb[NIns];
238 for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
239 for (p=(*sb)->phi; p; p=p->link) {
240 for (n=0; p->blk[n] != b; n++)
241 assert(n+1 < p->narg);
242 fixarg(&p->arg[n], p->cls, 0, fn);
244 seljmp(b, fn);
245 for (i=&b->ins[b->nins]; i!=b->ins;)
246 sel(*--i, fn);
247 b->nins = &insb[NIns] - curi;
248 idup(&b->ins, curi, b->nins);
251 if (debug['I']) {
252 fprintf(stderr, "\n> After instruction selection:\n");
253 printfn(fn, stderr);