amd64/emit.c

   1 #include "all.h"
   2
   3
   4 typedef struct E E;
   5
   6 struct E {
   7         FILE *f;
   8         Fn *fn;
   9         int fp;
  10         uint64_t fsz;
  11         int nclob;
  12 };
  13
  14 #define CMP(X) \
  15         X(Ciule,      "be") \
  16         X(Ciult,      "b")  \
  17         X(Cisle,      "le") \
  18         X(Cislt,      "l")  \
  19         X(Cisgt,      "g")  \
  20         X(Cisge,      "ge") \
  21         X(Ciugt,      "a")  \
  22         X(Ciuge,      "ae") \
  23         X(Cieq,       "z")  \
  24         X(Cine,       "nz") \
  25         X(NCmpI+Cfle, "be") \
  26         X(NCmpI+Cflt, "b")  \
  27         X(NCmpI+Cfgt, "a")  \
  28         X(NCmpI+Cfge, "ae") \
  29         X(NCmpI+Cfeq, "z")  \
  30         X(NCmpI+Cfne, "nz") \
  31         X(NCmpI+Cfo,  "np") \
  32         X(NCmpI+Cfuo, "p")
  33
  34 enum {
  35         SLong = 0,
  36         SWord = 1,
  37         SShort = 2,
  38         SByte = 3,
  39
  40         Ki = -1, /* matches Kw and Kl */
  41         Ka = -2, /* matches all classes */
  42 };
  43
  44 /* Instruction format strings:
  45  *
  46  * if the format string starts with -, the instruction
  47  * is assumed to be 3-address and is put in 2-address
  48  * mode using an extra mov if necessary
  49  *
  50  * if the format string starts with +, the same as the
  51  * above applies, but commutativity is also assumed
  52  *
  53  * %k  is used to set the class of the instruction,
  54  *     it'll expand to "l", "q", "ss", "sd", depending
  55  *     on the instruction class
  56  * %0  designates the first argument
  57  * %1  designates the second argument
  58  * %=  designates the result
  59  *
  60  * if %k is not used, a prefix to 0, 1, or = must be
  61  * added, it can be:
  62  *   M - memory reference
  63  *   L - long  (64 bits)
  64  *   W - word  (32 bits)
  65  *   H - short (16 bits)
  66  *   B - byte  (8 bits)
  67  *   S - single precision float
  68  *   D - double precision float
  69  */
  70 static struct {
  71         short op;
  72         short cls;
  73         char *fmt;
  74 } omap[] = {
  75         { Oadd,    Ka, "+add%k %1, %=" },
  76         { Osub,    Ka, "-sub%k %1, %=" },
  77         { Oand,    Ki, "+and%k %1, %=" },
  78         { Oor,     Ki, "+or%k %1, %=" },
  79         { Oxor,    Ki, "+xor%k %1, %=" },
  80         { Osar,    Ki, "-sar%k %B1, %=" },
  81         { Oshr,    Ki, "-shr%k %B1, %=" },
  82         { Oshl,    Ki, "-shl%k %B1, %=" },
  83         { Omul,    Ki, "+imul%k %1, %=" },
  84         { Omul,    Ks, "+mulss %1, %=" },
  85         { Omul,    Kd, "+mulsd %1, %=" },
  86         { Odiv,    Ka, "-div%k %1, %=" },
  87         { Ostorel, Ka, "movq %L0, %M1" },
  88         { Ostorew, Ka, "movl %W0, %M1" },
  89         { Ostoreh, Ka, "movw %H0, %M1" },
  90         { Ostoreb, Ka, "movb %B0, %M1" },
  91         { Ostores, Ka, "movss %S0, %M1" },
  92         { Ostored, Ka, "movsd %D0, %M1" },
  93         { Oload,   Ka, "mov%k %M0, %=" },
  94         { Oloadsw, Kl, "movslq %M0, %L=" },
  95         { Oloadsw, Kw, "movl %M0, %W=" },
  96         { Oloaduw, Ki, "movl %M0, %W=" },
  97         { Oloadsh, Ki, "movsw%k %M0, %=" },
  98         { Oloaduh, Ki, "movzw%k %M0, %=" },
  99         { Oloadsb, Ki, "movsb%k %M0, %=" },
 100         { Oloadub, Ki, "movzb%k %M0, %=" },
 101         { Oextsw,  Kl, "movslq %W0, %L=" },
 102         { Oextuw,  Kl, "movl %W0, %W=" },
 103         { Oextsh,  Ki, "movsw%k %H0, %=" },
 104         { Oextuh,  Ki, "movzw%k %H0, %=" },
 105         { Oextsb,  Ki, "movsb%k %B0, %=" },
 106         { Oextub,  Ki, "movzb%k %B0, %=" },
 107
 108         { Oexts,   Kd, "cvtss2sd %0, %=" },
 109         { Otruncd, Ks, "cvtsd2ss %0, %=" },
 110         { Ostosi,  Ki, "cvttss2si%k %0, %=" },
 111         { Odtosi,  Ki, "cvttsd2si%k %0, %=" },
 112         { Oswtof,  Ka, "cvtsi2%k %W0, %=" },
 113         { Osltof,  Ka, "cvtsi2%k %L0, %=" },
 114         { Ocast,   Ki, "movq %D0, %L=" },
 115         { Ocast,   Ka, "movq %L0, %D=" },
 116
 117         { Oaddr,   Ki, "lea%k %M0, %=" },
 118         { Oswap,   Ki, "xchg%k %0, %1" },
 119         { Osign,   Kl, "cqto" },
 120         { Osign,   Kw, "cltd" },
 121         { Oxdiv,   Ki, "div%k %0" },
 122         { Oxidiv,  Ki, "idiv%k %0" },
 123         { Oxcmp,   Ks, "ucomiss %S0, %S1" },
 124         { Oxcmp,   Kd, "ucomisd %D0, %D1" },
 125         { Oxcmp,   Ki, "cmp%k %0, %1" },
 126         { Oxtest,  Ki, "test%k %0, %1" },
 127 #define X(c, s) \
 128         { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
 129         CMP(X)
 130 #undef X
 131         { NOp, 0, 0 }
 132 };
 133
 134 static char *rname[][4] = {
 135         [RAX] = {"rax", "eax", "ax", "al"},
 136         [RBX] = {"rbx", "ebx", "bx", "bl"},
 137         [RCX] = {"rcx", "ecx", "cx", "cl"},
 138         [RDX] = {"rdx", "edx", "dx", "dl"},
 139         [RSI] = {"rsi", "esi", "si", "sil"},
 140         [RDI] = {"rdi", "edi", "di", "dil"},
 141         [RBP] = {"rbp", "ebp", "bp", "bpl"},
 142         [RSP] = {"rsp", "esp", "sp", "spl"},
 143         [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
 144         [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
 145         [R10] = {"r10", "r10d", "r10w", "r10b"},
 146         [R11] = {"r11", "r11d", "r11w", "r11b"},
 147         [R12] = {"r12", "r12d", "r12w", "r12b"},
 148         [R13] = {"r13", "r13d", "r13w", "r13b"},
 149         [R14] = {"r14", "r14d", "r14w", "r14b"},
 150         [R15] = {"r15", "r15d", "r15w", "r15b"},
 151 };
 152
 153
 154 static int
 155 slot(Ref r, E *e)
 156 {
 157         int s;
 158
 159         s = rsval(r);
 160         assert(s <= e->fn->slot);
 161         /* specific to NAlign == 3 */
 162         if (s < 0) {
 163                 if (e->fp == RSP)
 164                         return 4*-s - 8 + e->fsz + e->nclob*8;
 165                 else
 166                         return 4*-s;
 167         }
 168         else if (e->fp == RSP)
 169                 return 4*s + e->nclob*8;
 170         else if (e->fn->vararg)
 171                 return -176 + -4 * (e->fn->slot - s);
 172         else
 173                 return -4 * (e->fn->slot - s);
 174 }
 175
 176 static void
 177 emitcon(Con *con, E *e)
 178 {
 179         char *p, *l;
 180
 181         switch (con->type) {
 182         case CAddr:
 183                 l = str(con->sym.id);
 184                 p = l[0] == '"' ? "" : T.assym;
 185                 if (con->sym.type == SThr) {
 186                         if (T.apple)
 187                                 fprintf(e->f, "%s%s@TLVP", p, l);
 188                         else
 189                                 fprintf(e->f, "%%fs:%s%s@tpoff", p, l);
 190                 } else
 191                         fprintf(e->f, "%s%s", p, l);
 192                 if (con->bits.i)
 193                         fprintf(e->f, "%+"PRId64, con->bits.i);
 194                 break;
 195         case CBits:
 196                 fprintf(e->f, "%"PRId64, con->bits.i);
 197                 break;
 198         default:
 199                 die("unreachable");
 200         }
 201 }
 202
 203 static char *
 204 regtoa(int reg, int sz)
 205 {
 206         static char buf[6];
 207
 208         assert(reg <= XMM15);
 209         if (reg >= XMM0) {
 210                 sprintf(buf, "xmm%d", reg-XMM0);
 211                 return buf;
 212         } else
 213                 return rname[reg][sz];
 214 }
 215
 216 static Ref
 217 getarg(char c, Ins *i)
 218 {
 219         switch (c) {
 220         case '0':
 221                 return i->arg[0];
 222         case '1':
 223                 return i->arg[1];
 224         case '=':
 225                 return i->to;
 226         default:
 227                 die("invalid arg letter %c", c);
 228         }
 229 }
 230
 231 static void emitins(Ins, E *);
 232
 233 static void
 234 emitcopy(Ref r1, Ref r2, int k, E *e)
 235 {
 236         Ins icp;
 237
 238         icp.op = Ocopy;
 239         icp.arg[0] = r2;
 240         icp.to = r1;
 241         icp.cls = k;
 242         emitins(icp, e);
 243 }
 244
 245 static void
 246 emitf(char *s, Ins *i, E *e)
 247 {
 248         static char clstoa[][3] = {"l", "q", "ss", "sd"};
 249         char c;
 250         int sz;
 251         Ref ref;
 252         Mem *m;
 253         Con off;
 254
 255         switch (*s) {
 256         case '+':
 257                 if (req(i->arg[1], i->to)) {
 258                         ref = i->arg[0];
 259                         i->arg[0] = i->arg[1];
 260                         i->arg[1] = ref;
 261                 }
 262                 /* fall through */
 263         case '-':
 264                 assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
 265                         "cannot convert to 2-address");
 266                 emitcopy(i->to, i->arg[0], i->cls, e);
 267                 s++;
 268                 break;
 269         }
 270
 271         fputc('\t', e->f);
 272 Next:
 273         while ((c = *s++) != '%')
 274                 if (!c) {
 275                         fputc('\n', e->f);
 276                         return;
 277                 } else
 278                         fputc(c, e->f);
 279         switch ((c = *s++)) {
 280         case '%':
 281                 fputc('%', e->f);
 282                 break;
 283         case 'k':
 284                 fputs(clstoa[i->cls], e->f);
 285                 break;
 286         case '0':
 287         case '1':
 288         case '=':
 289                 sz = KWIDE(i->cls) ? SLong : SWord;
 290                 s--;
 291                 goto Ref;
 292         case 'D':
 293         case 'S':
 294                 sz = SLong; /* does not matter for floats */
 295         Ref:
 296                 c = *s++;
 297                 ref = getarg(c, i);
 298                 switch (rtype(ref)) {
 299                 case RTmp:
 300                         assert(isreg(ref));
 301                         fprintf(e->f, "%%%s", regtoa(ref.val, sz));
 302                         break;
 303                 case RSlot:
 304                         fprintf(e->f, "%d(%%%s)",
 305                                 slot(ref, e),
 306                                 regtoa(e->fp, SLong)
 307                         );
 308                         break;
 309                 case RMem:
 310                 Mem:
 311                         m = &e->fn->mem[ref.val];
 312                         if (rtype(m->base) == RSlot) {
 313                                 off.type = CBits;
 314                                 off.bits.i = slot(m->base, e);
 315                                 addcon(&m->offset, &off, 1);
 316                                 m->base = TMP(e->fp);
 317                         }
 318                         if (m->offset.type != CUndef)
 319                                 emitcon(&m->offset, e);
 320                         fputc('(', e->f);
 321                         if (!req(m->base, R))
 322                                 fprintf(e->f, "%%%s",
 323                                         regtoa(m->base.val, SLong)
 324                                 );
 325                         else if (m->offset.type == CAddr)
 326                                 fprintf(e->f, "%%rip");
 327                         if (!req(m->index, R))
 328                                 fprintf(e->f, ", %%%s, %d",
 329                                         regtoa(m->index.val, SLong),
 330                                         m->scale
 331                                 );
 332                         fputc(')', e->f);
 333                         break;
 334                 case RCon:
 335                         fputc('$', e->f);
 336                         emitcon(&e->fn->con[ref.val], e);
 337                         break;
 338                 default:
 339                         die("unreachable");
 340                 }
 341                 break;
 342         case 'L':
 343                 sz = SLong;
 344                 goto Ref;
 345         case 'W':
 346                 sz = SWord;
 347                 goto Ref;
 348         case 'H':
 349                 sz = SShort;
 350                 goto Ref;
 351         case 'B':
 352                 sz = SByte;
 353                 goto Ref;
 354         case 'M':
 355                 c = *s++;
 356                 ref = getarg(c, i);
 357                 switch (rtype(ref)) {
 358                 case RMem:
 359                         goto Mem;
 360                 case RSlot:
 361                         fprintf(e->f, "%d(%%%s)",
 362                                 slot(ref, e),
 363                                 regtoa(e->fp, SLong)
 364                         );
 365                         break;
 366                 case RCon:
 367                         off = e->fn->con[ref.val];
 368                         emitcon(&off, e);
 369                         if (off.type == CAddr)
 370                         if (off.sym.type != SThr || T.apple)
 371                                 fprintf(e->f, "(%%rip)");
 372                         break;
 373                 case RTmp:
 374                         assert(isreg(ref));
 375                         fprintf(e->f, "(%%%s)", regtoa(ref.val, SLong));
 376                         break;
 377                 default:
 378                         die("unreachable");
 379                 }
 380                 break;
 381         default:
 382                 die("invalid format specifier %%%c", c);
 383         }
 384         goto Next;
 385 }
 386
 387 static void *negmask[4] = {
 388         [Ks] = (uint32_t[4]){ 0x80000000 },
 389         [Kd] = (uint64_t[2]){ 0x8000000000000000 },
 390 };
 391
 392 static void
 393 emitins(Ins i, E *e)
 394 {
 395         Ref r;
 396         int64_t val;
 397         int o, t0;
 398         Ins ineg;
 399         Con *con;
 400         char *sym;
 401
 402         switch (i.op) {
 403         default:
 404         Table:
 405                 /* most instructions are just pulled out of
 406                  * the table omap[], some special cases are
 407                  * detailed below */
 408                 for (o=0;; o++) {
 409                         /* this linear search should really be a binary
 410                          * search */
 411                         if (omap[o].op == NOp)
 412                                 die("no match for %s(%c)",
 413                                         optab[i.op].name, "wlsd"[i.cls]);
 414                         if (omap[o].op == i.op)
 415                         if (omap[o].cls == i.cls
 416                         || (omap[o].cls == Ki && KBASE(i.cls) == 0)
 417                         || (omap[o].cls == Ka))
 418                                 break;
 419                 }
 420                 emitf(omap[o].fmt, &i, e);
 421                 break;
 422         case Onop:
 423                 /* just do nothing for nops, they are inserted
 424                  * by some passes */
 425                 break;
 426         case Omul:
 427                 /* here, we try to use the 3-addresss form
 428                  * of multiplication when possible */
 429                 if (rtype(i.arg[1]) == RCon) {
 430                         r = i.arg[0];
 431                         i.arg[0] = i.arg[1];
 432                         i.arg[1] = r;
 433                 }
 434                 if (KBASE(i.cls) == 0 /* only available for ints */
 435                 && rtype(i.arg[0]) == RCon
 436                 && rtype(i.arg[1]) == RTmp) {
 437                         emitf("imul%k %0, %1, %=", &i, e);
 438                         break;
 439                 }
 440                 goto Table;
 441         case Osub:
 442                 /* we have to use the negation trick to handle
 443                  * some 3-address subtractions */
 444                 if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
 445                         ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
 446                         emitins(ineg, e);
 447                         emitf("add%k %0, %=", &i, e);
 448                         break;
 449                 }
 450                 goto Table;
 451         case Oneg:
 452                 if (!req(i.to, i.arg[0]))
 453                         emitf("mov%k %0, %=", &i, e);
 454                 if (KBASE(i.cls) == 0)
 455                         emitf("neg%k %=", &i, e);
 456                 else
 457                         fprintf(e->f,
 458                                 "\txorp%c %sfp%d(%%rip), %%%s\n",
 459                                 "xxsd"[i.cls],
 460                                 T.asloc,
 461                                 stashbits(negmask[i.cls], 16),
 462                                 regtoa(i.to.val, SLong)
 463                         );
 464                 break;
 465         case Odiv:
 466                 /* use xmm15 to adjust the instruction when the
 467                  * conversion to 2-address in emitf() would fail */
 468                 if (req(i.to, i.arg[1])) {
 469                         i.arg[1] = TMP(XMM0+15);
 470                         emitf("mov%k %=, %1", &i, e);
 471                         emitf("mov%k %0, %=", &i, e);
 472                         i.arg[0] = i.to;
 473                 }
 474                 goto Table;
 475         case Ocopy:
 476                 /* copies are used for many things; see my note
 477                  * to understand how to load big constants:
 478                  * https://c9x.me/notes/2015-09-19.html */
 479                 assert(rtype(i.to) != RMem);
 480                 if (req(i.to, R) || req(i.arg[0], R))
 481                         break;
 482                 if (req(i.to, i.arg[0]))
 483                         break;
 484                 t0 = rtype(i.arg[0]);
 485                 if (i.cls == Kl
 486                 && t0 == RCon
 487                 && e->fn->con[i.arg[0].val].type == CBits) {
 488                         val = e->fn->con[i.arg[0].val].bits.i;
 489                         if (isreg(i.to))
 490                         if (val >= 0 && val <= UINT32_MAX) {
 491                                 emitf("movl %W0, %W=", &i, e);
 492                                 break;
 493                         }
 494                         if (rtype(i.to) == RSlot)
 495                         if (val < INT32_MIN || val > INT32_MAX) {
 496                                 emitf("movl %0, %=", &i, e);
 497                                 emitf("movl %0>>32, 4+%=", &i, e);
 498                                 break;
 499                         }
 500                 }
 501                 if (isreg(i.to)
 502                 && t0 == RCon
 503                 && e->fn->con[i.arg[0].val].type == CAddr) {
 504                         emitf("lea%k %M0, %=", &i, e);
 505                         break;
 506                 }
 507                 if (rtype(i.to) == RSlot
 508                 && (t0 == RSlot || t0 == RMem)) {
 509                         i.cls = KWIDE(i.cls) ? Kd : Ks;
 510                         i.arg[1] = TMP(XMM0+15);
 511                         emitf("mov%k %0, %1", &i, e);
 512                         emitf("mov%k %1, %=", &i, e);
 513                         break;
 514                 }
 515                 /* conveniently, the assembler knows if it
 516                  * should use movabsq when reading movq */
 517                 emitf("mov%k %0, %=", &i, e);
 518                 break;
 519         case Oaddr:
 520                 if (!T.apple
 521                 && rtype(i.arg[0]) == RCon
 522                 && e->fn->con[i.arg[0].val].sym.type == SThr) {
 523                         /* derive the symbol address from the TCB
 524                          * address at offset 0 of %fs */
 525                         assert(isreg(i.to));
 526                         con = &e->fn->con[i.arg[0].val];
 527                         sym = str(con->sym.id);
 528                         emitf("movq %%fs:0, %L=", &i, e);
 529                         fprintf(e->f, "\tleaq %s%s@tpoff",
 530                                 sym[0] == '"' ? "" : T.assym, sym);
 531                         if (con->bits.i)
 532                                 fprintf(e->f, "%+"PRId64,
 533                                         con->bits.i);
 534                         fprintf(e->f, "(%%%s), %%%s\n",
 535                                 regtoa(i.to.val, SLong),
 536                                 regtoa(i.to.val, SLong));
 537                         break;
 538                 }
 539                 goto Table;
 540         case Ocall:
 541                 /* calls simply have a weird syntax in AT&T
 542                  * assembly... */
 543                 switch (rtype(i.arg[0])) {
 544                 case RCon:
 545                         fprintf(e->f, "\tcallq ");
 546                         emitcon(&e->fn->con[i.arg[0].val], e);
 547                         fprintf(e->f, "\n");
 548                         break;
 549                 case RTmp:
 550                         emitf("callq *%L0", &i, e);
 551                         break;
 552                 default:
 553                         die("invalid call argument");
 554                 }
 555                 break;
 556         case Osalloc:
 557                 /* there is no good reason why this is here
 558                  * maybe we should split Osalloc in 2 different
 559                  * instructions depending on the result
 560                  */
 561                 assert(e->fp == RBP);
 562                 emitf("subq %L0, %%rsp", &i, e);
 563                 if (!req(i.to, R))
 564                         emitcopy(i.to, TMP(RSP), Kl, e);
 565                 break;
 566         case Oswap:
 567                 if (KBASE(i.cls) == 0)
 568                         goto Table;
 569                 /* for floats, there is no swap instruction
 570                  * so we use xmm15 as a temporary
 571                  */
 572                 emitcopy(TMP(XMM0+15), i.arg[0], i.cls, e);
 573                 emitcopy(i.arg[0], i.arg[1], i.cls, e);
 574                 emitcopy(i.arg[1], TMP(XMM0+15), i.cls, e);
 575                 break;
 576         case Odbgloc:
 577                 emitdbgloc(i.arg[0].val, i.arg[1].val, e->f);
 578                 break;
 579         }
 580 }
 581
 582 static void
 583 framesz(E *e)
 584 {
 585         uint64_t i, o, f;
 586
 587         /* specific to NAlign == 3 */
 588         o = 0;
 589         if (!e->fn->leaf) {
 590                 for (i=0, o=0; i<NCLR; i++)
 591                         o ^= e->fn->reg >> amd64_sysv_rclob[i];
 592                 o &= 1;
 593         }
 594         f = e->fn->slot;
 595         f = (f + 3) & -4;
 596         if (f > 0
 597         && e->fp == RSP
 598         && e->fn->salign == 4)
 599                 f += 2;
 600         e->fsz = 4*f + 8*o + 176*e->fn->vararg;
 601 }
 602
 603 void
 604 amd64_emitfn(Fn *fn, FILE *f)
 605 {
 606         static char *ctoa[] = {
 607         #define X(c, s) [c] = s,
 608                 CMP(X)
 609         #undef X
 610         };
 611         static int id0;
 612         Blk *b, *s;
 613         Ins *i, itmp;
 614         int *r, c, o, n, lbl;
 615         E *e;
 616
 617         e = &(E){.f = f, .fn = fn};
 618         emitfnlnk(fn->name, &fn->lnk, f);
 619         fputs("\tendbr64\n", f);
 620         if (!fn->leaf || fn->vararg || fn->dynalloc) {
 621                 e->fp = RBP;
 622                 fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
 623         } else
 624                 e->fp = RSP;
 625         framesz(e);
 626         if (e->fsz)
 627                 fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
 628         if (fn->vararg) {
 629                 o = -176;
 630                 for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
 631                         fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
 632                 for (n=0; n<8; ++n, o+=16)
 633                         fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
 634         }
 635         for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
 636                 if (fn->reg & BIT(*r)) {
 637                         itmp.arg[0] = TMP(*r);
 638                         emitf("pushq %L0", &itmp, e);
 639                         e->nclob++;
 640                 }
 641
 642         for (lbl=0, b=fn->start; b; b=b->link) {
 643                 if (lbl || b->npred > 1)
 644                         fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
 645                 for (i=b->ins; i!=&b->ins[b->nins]; i++)
 646                         emitins(*i, e);
 647                 lbl = 1;
 648                 switch (b->jmp.type) {
 649                 case Jhlt:
 650                         fprintf(f, "\tud2\n");
 651                         break;
 652                 case Jret0:
 653                         if (fn->dynalloc)
 654                                 fprintf(f,
 655                                         "\tmovq %%rbp, %%rsp\n"
 656                                         "\tsubq $%"PRIu64", %%rsp\n",
 657                                         e->fsz + e->nclob * 8);
 658                         for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
 659                                 if (fn->reg & BIT(*--r)) {
 660                                         itmp.arg[0] = TMP(*r);
 661                                         emitf("popq %L0", &itmp, e);
 662                                 }
 663                         if (e->fp == RBP)
 664                                 fputs("\tleave\n", f);
 665                         else if (e->fsz)
 666                                 fprintf(f,
 667                                         "\taddq $%"PRIu64", %%rsp\n",
 668                                         e->fsz);
 669                         fputs("\tret\n", f);
 670                         break;
 671                 case Jjmp:
 672                 Jmp:
 673                         if (b->s1 != b->link)
 674                                 fprintf(f, "\tjmp %sbb%d\n",
 675                                         T.asloc, id0+b->s1->id);
 676                         else
 677                                 lbl = 0;
 678                         break;
 679                 default:
 680                         c = b->jmp.type - Jjf;
 681                         if (0 <= c && c <= NCmp) {
 682                                 if (b->link == b->s2) {
 683                                         s = b->s1;
 684                                         b->s1 = b->s2;
 685                                         b->s2 = s;
 686                                 } else
 687                                         c = cmpneg(c);
 688                                 fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
 689                                         T.asloc, id0+b->s2->id);
 690                                 goto Jmp;
 691                         }
 692                         die("unhandled jump %d", b->jmp.type);
 693                 }
 694         }
 695         id0 += fn->nblk;
 696         if (!T.apple)
 697                 elf_emitfnfin(fn->name, f);
 698 }