40 Ki
= -1, /* matches Kw and Kl */
41 Ka
= -2, /* matches all classes */
44 /* Instruction format strings:
46 * if the format string starts with -, the instruction
47 * is assumed to be 3-address and is put in 2-address
48 * mode using an extra mov if necessary
50 * if the format string starts with +, the same as the
51 * above applies, but commutativity is also assumed
53 * %k is used to set the class of the instruction,
54 * it'll expand to "l", "q", "ss", "sd", depending
55 * on the instruction class
56 * %0 designates the first argument
57 * %1 designates the second argument
58 * %= designates the result
60 * if %k is not used, a prefix to 0, 1, or = must be
62 * M - memory reference
67 * S - single precision float
68 * D - double precision float
75 { Oadd
, Ka
, "+add%k %1, %=" },
76 { Osub
, Ka
, "-sub%k %1, %=" },
77 { Oand
, Ki
, "+and%k %1, %=" },
78 { Oor
, Ki
, "+or%k %1, %=" },
79 { Oxor
, Ki
, "+xor%k %1, %=" },
80 { Osar
, Ki
, "-sar%k %B1, %=" },
81 { Oshr
, Ki
, "-shr%k %B1, %=" },
82 { Oshl
, Ki
, "-shl%k %B1, %=" },
83 { Omul
, Ki
, "+imul%k %1, %=" },
84 { Omul
, Ks
, "+mulss %1, %=" },
85 { Omul
, Kd
, "+mulsd %1, %=" },
86 { Odiv
, Ka
, "-div%k %1, %=" },
87 { Ostorel
, Ka
, "movq %L0, %M1" },
88 { Ostorew
, Ka
, "movl %W0, %M1" },
89 { Ostoreh
, Ka
, "movw %H0, %M1" },
90 { Ostoreb
, Ka
, "movb %B0, %M1" },
91 { Ostores
, Ka
, "movss %S0, %M1" },
92 { Ostored
, Ka
, "movsd %D0, %M1" },
93 { Oload
, Ka
, "mov%k %M0, %=" },
94 { Oloadsw
, Kl
, "movslq %M0, %L=" },
95 { Oloadsw
, Kw
, "movl %M0, %W=" },
96 { Oloaduw
, Ki
, "movl %M0, %W=" },
97 { Oloadsh
, Ki
, "movsw%k %M0, %=" },
98 { Oloaduh
, Ki
, "movzw%k %M0, %=" },
99 { Oloadsb
, Ki
, "movsb%k %M0, %=" },
100 { Oloadub
, Ki
, "movzb%k %M0, %=" },
101 { Oextsw
, Kl
, "movslq %W0, %L=" },
102 { Oextuw
, Kl
, "movl %W0, %W=" },
103 { Oextsh
, Ki
, "movsw%k %H0, %=" },
104 { Oextuh
, Ki
, "movzw%k %H0, %=" },
105 { Oextsb
, Ki
, "movsb%k %B0, %=" },
106 { Oextub
, Ki
, "movzb%k %B0, %=" },
108 { Oexts
, Kd
, "cvtss2sd %0, %=" },
109 { Otruncd
, Ks
, "cvtsd2ss %0, %=" },
110 { Ostosi
, Ki
, "cvttss2si%k %0, %=" },
111 { Odtosi
, Ki
, "cvttsd2si%k %0, %=" },
112 { Oswtof
, Ka
, "cvtsi2%k %W0, %=" },
113 { Osltof
, Ka
, "cvtsi2%k %L0, %=" },
114 { Ocast
, Ki
, "movq %D0, %L=" },
115 { Ocast
, Ka
, "movq %L0, %D=" },
117 { Oaddr
, Ki
, "lea%k %M0, %=" },
118 { Oswap
, Ki
, "xchg%k %0, %1" },
119 { Osign
, Kl
, "cqto" },
120 { Osign
, Kw
, "cltd" },
121 { Oxdiv
, Ki
, "div%k %0" },
122 { Oxidiv
, Ki
, "idiv%k %0" },
123 { Oxcmp
, Ks
, "ucomiss %S0, %S1" },
124 { Oxcmp
, Kd
, "ucomisd %D0, %D1" },
125 { Oxcmp
, Ki
, "cmp%k %0, %1" },
126 { Oxtest
, Ki
, "test%k %0, %1" },
128 { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
134 static char *rname
[][4] = {
135 [RAX
] = {"rax", "eax", "ax", "al"},
136 [RBX
] = {"rbx", "ebx", "bx", "bl"},
137 [RCX
] = {"rcx", "ecx", "cx", "cl"},
138 [RDX
] = {"rdx", "edx", "dx", "dl"},
139 [RSI
] = {"rsi", "esi", "si", "sil"},
140 [RDI
] = {"rdi", "edi", "di", "dil"},
141 [RBP
] = {"rbp", "ebp", "bp", "bpl"},
142 [RSP
] = {"rsp", "esp", "sp", "spl"},
143 [R8
] = {"r8" , "r8d", "r8w", "r8b"},
144 [R9
] = {"r9" , "r9d", "r9w", "r9b"},
145 [R10
] = {"r10", "r10d", "r10w", "r10b"},
146 [R11
] = {"r11", "r11d", "r11w", "r11b"},
147 [R12
] = {"r12", "r12d", "r12w", "r12b"},
148 [R13
] = {"r13", "r13d", "r13w", "r13b"},
149 [R14
] = {"r14", "r14d", "r14w", "r14b"},
150 [R15
] = {"r15", "r15d", "r15w", "r15b"},
160 assert(s
<= e
->fn
->slot
);
161 /* specific to NAlign == 3 */
164 return 4*-s
- 8 + e
->fsz
+ e
->nclob
*8;
168 else if (e
->fp
== RSP
)
169 return 4*s
+ e
->nclob
*8;
170 else if (e
->fn
->vararg
)
171 return -176 + -4 * (e
->fn
->slot
- s
);
173 return -4 * (e
->fn
->slot
- s
);
177 emitcon(Con
*con
, E
*e
)
183 l
= str(con
->sym
.id
);
184 p
= l
[0] == '"' ? "" : T
.assym
;
185 if (con
->sym
.type
== SThr
) {
187 fprintf(e
->f
, "%s%s@TLVP", p
, l
);
189 fprintf(e
->f
, "%%fs:%s%s@tpoff", p
, l
);
191 fprintf(e
->f
, "%s%s", p
, l
);
193 fprintf(e
->f
, "%+"PRId64
, con
->bits
.i
);
196 fprintf(e
->f
, "%"PRId64
, con
->bits
.i
);
204 regtoa(int reg
, int sz
)
208 assert(reg
<= XMM15
);
210 sprintf(buf
, "xmm%d", reg
-XMM0
);
213 return rname
[reg
][sz
];
217 getarg(char c
, Ins
*i
)
227 die("invalid arg letter %c", c
);
231 static void emitins(Ins
, E
*);
234 emitcopy(Ref r1
, Ref r2
, int k
, E
*e
)
246 emitf(char *s
, Ins
*i
, E
*e
)
248 static char clstoa
[][3] = {"l", "q", "ss", "sd"};
257 if (req(i
->arg
[1], i
->to
)) {
259 i
->arg
[0] = i
->arg
[1];
264 assert((!req(i
->arg
[1], i
->to
) || req(i
->arg
[0], i
->to
)) &&
265 "cannot convert to 2-address");
266 emitcopy(i
->to
, i
->arg
[0], i
->cls
, e
);
273 while ((c
= *s
++) != '%')
279 switch ((c
= *s
++)) {
284 fputs(clstoa
[i
->cls
], e
->f
);
289 sz
= KWIDE(i
->cls
) ? SLong
: SWord
;
294 sz
= SLong
; /* does not matter for floats */
298 switch (rtype(ref
)) {
301 fprintf(e
->f
, "%%%s", regtoa(ref
.val
, sz
));
304 fprintf(e
->f
, "%d(%%%s)",
311 m
= &e
->fn
->mem
[ref
.val
];
312 if (rtype(m
->base
) == RSlot
) {
314 off
.bits
.i
= slot(m
->base
, e
);
315 addcon(&m
->offset
, &off
, 1);
316 m
->base
= TMP(e
->fp
);
318 if (m
->offset
.type
!= CUndef
)
319 emitcon(&m
->offset
, e
);
321 if (!req(m
->base
, R
))
322 fprintf(e
->f
, "%%%s",
323 regtoa(m
->base
.val
, SLong
)
325 else if (m
->offset
.type
== CAddr
)
326 fprintf(e
->f
, "%%rip");
327 if (!req(m
->index
, R
))
328 fprintf(e
->f
, ", %%%s, %d",
329 regtoa(m
->index
.val
, SLong
),
336 emitcon(&e
->fn
->con
[ref
.val
], e
);
357 switch (rtype(ref
)) {
361 fprintf(e
->f
, "%d(%%%s)",
367 off
= e
->fn
->con
[ref
.val
];
369 if (off
.type
== CAddr
)
370 if (off
.sym
.type
!= SThr
|| T
.apple
)
371 fprintf(e
->f
, "(%%rip)");
375 fprintf(e
->f
, "(%%%s)", regtoa(ref
.val
, SLong
));
382 die("invalid format specifier %%%c", c
);
387 static void *negmask
[4] = {
388 [Ks
] = (uint32_t[4]){ 0x80000000 },
389 [Kd
] = (uint64_t[2]){ 0x8000000000000000 },
405 /* most instructions are just pulled out of
406 * the table omap[], some special cases are
409 /* this linear search should really be a binary
411 if (omap
[o
].op
== NOp
)
412 die("no match for %s(%c)",
413 optab
[i
.op
].name
, "wlsd"[i
.cls
]);
414 if (omap
[o
].op
== i
.op
)
415 if (omap
[o
].cls
== i
.cls
416 || (omap
[o
].cls
== Ki
&& KBASE(i
.cls
) == 0)
417 || (omap
[o
].cls
== Ka
))
420 emitf(omap
[o
].fmt
, &i
, e
);
423 /* just do nothing for nops, they are inserted
427 /* here, we try to use the 3-addresss form
428 * of multiplication when possible */
429 if (rtype(i
.arg
[1]) == RCon
) {
434 if (KBASE(i
.cls
) == 0 /* only available for ints */
435 && rtype(i
.arg
[0]) == RCon
436 && rtype(i
.arg
[1]) == RTmp
) {
437 emitf("imul%k %0, %1, %=", &i
, e
);
442 /* we have to use the negation trick to handle
443 * some 3-address subtractions */
444 if (req(i
.to
, i
.arg
[1]) && !req(i
.arg
[0], i
.to
)) {
445 ineg
= (Ins
){Oneg
, i
.cls
, i
.to
, {i
.to
}};
447 emitf("add%k %0, %=", &i
, e
);
452 if (!req(i
.to
, i
.arg
[0]))
453 emitf("mov%k %0, %=", &i
, e
);
454 if (KBASE(i
.cls
) == 0)
455 emitf("neg%k %=", &i
, e
);
458 "\txorp%c %sfp%d(%%rip), %%%s\n",
461 stashbits(negmask
[i
.cls
], 16),
462 regtoa(i
.to
.val
, SLong
)
466 /* use xmm15 to adjust the instruction when the
467 * conversion to 2-address in emitf() would fail */
468 if (req(i
.to
, i
.arg
[1])) {
469 i
.arg
[1] = TMP(XMM0
+15);
470 emitf("mov%k %=, %1", &i
, e
);
471 emitf("mov%k %0, %=", &i
, e
);
476 /* copies are used for many things; see my note
477 * to understand how to load big constants:
478 * https://c9x.me/notes/2015-09-19.html */
479 assert(rtype(i
.to
) != RMem
);
480 if (req(i
.to
, R
) || req(i
.arg
[0], R
))
482 if (req(i
.to
, i
.arg
[0]))
484 t0
= rtype(i
.arg
[0]);
487 && e
->fn
->con
[i
.arg
[0].val
].type
== CBits
) {
488 val
= e
->fn
->con
[i
.arg
[0].val
].bits
.i
;
490 if (val
>= 0 && val
<= UINT32_MAX
) {
491 emitf("movl %W0, %W=", &i
, e
);
494 if (rtype(i
.to
) == RSlot
)
495 if (val
< INT32_MIN
|| val
> INT32_MAX
) {
496 emitf("movl %0, %=", &i
, e
);
497 emitf("movl %0>>32, 4+%=", &i
, e
);
503 && e
->fn
->con
[i
.arg
[0].val
].type
== CAddr
) {
504 emitf("lea%k %M0, %=", &i
, e
);
507 if (rtype(i
.to
) == RSlot
508 && (t0
== RSlot
|| t0
== RMem
)) {
509 i
.cls
= KWIDE(i
.cls
) ? Kd
: Ks
;
510 i
.arg
[1] = TMP(XMM0
+15);
511 emitf("mov%k %0, %1", &i
, e
);
512 emitf("mov%k %1, %=", &i
, e
);
515 /* conveniently, the assembler knows if it
516 * should use movabsq when reading movq */
517 emitf("mov%k %0, %=", &i
, e
);
521 && rtype(i
.arg
[0]) == RCon
522 && e
->fn
->con
[i
.arg
[0].val
].sym
.type
== SThr
) {
523 /* derive the symbol address from the TCB
524 * address at offset 0 of %fs */
526 con
= &e
->fn
->con
[i
.arg
[0].val
];
527 sym
= str(con
->sym
.id
);
528 emitf("movq %%fs:0, %L=", &i
, e
);
529 fprintf(e
->f
, "\tleaq %s%s@tpoff",
530 sym
[0] == '"' ? "" : T
.assym
, sym
);
532 fprintf(e
->f
, "%+"PRId64
,
534 fprintf(e
->f
, "(%%%s), %%%s\n",
535 regtoa(i
.to
.val
, SLong
),
536 regtoa(i
.to
.val
, SLong
));
541 /* calls simply have a weird syntax in AT&T
543 switch (rtype(i
.arg
[0])) {
545 fprintf(e
->f
, "\tcallq ");
546 emitcon(&e
->fn
->con
[i
.arg
[0].val
], e
);
550 emitf("callq *%L0", &i
, e
);
553 die("invalid call argument");
557 /* there is no good reason why this is here
558 * maybe we should split Osalloc in 2 different
559 * instructions depending on the result
561 assert(e
->fp
== RBP
);
562 emitf("subq %L0, %%rsp", &i
, e
);
564 emitcopy(i
.to
, TMP(RSP
), Kl
, e
);
567 if (KBASE(i
.cls
) == 0)
569 /* for floats, there is no swap instruction
570 * so we use xmm15 as a temporary
572 emitcopy(TMP(XMM0
+15), i
.arg
[0], i
.cls
, e
);
573 emitcopy(i
.arg
[0], i
.arg
[1], i
.cls
, e
);
574 emitcopy(i
.arg
[1], TMP(XMM0
+15), i
.cls
, e
);
577 emitdbgloc(i
.arg
[0].val
, i
.arg
[1].val
, e
->f
);
587 /* specific to NAlign == 3 */
590 for (i
=0, o
=0; i
<NCLR
; i
++)
591 o
^= e
->fn
->reg
>> amd64_sysv_rclob
[i
];
598 && e
->fn
->salign
== 4)
600 e
->fsz
= 4*f
+ 8*o
+ 176*e
->fn
->vararg
;
604 amd64_emitfn(Fn
*fn
, FILE *f
)
606 static char *ctoa
[] = {
607 #define X(c, s) [c] = s,
614 int *r
, c
, o
, n
, lbl
;
617 e
= &(E
){.f
= f
, .fn
= fn
};
618 emitfnlnk(fn
->name
, &fn
->lnk
, f
);
619 fputs("\tendbr64\n", f
);
620 if (!fn
->leaf
|| fn
->vararg
|| fn
->dynalloc
) {
622 fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f
);
627 fprintf(f
, "\tsubq $%"PRIu64
", %%rsp\n", e
->fsz
);
630 for (r
=amd64_sysv_rsave
; r
<&amd64_sysv_rsave
[6]; r
++, o
+=8)
631 fprintf(f
, "\tmovq %%%s, %d(%%rbp)\n", rname
[*r
][0], o
);
632 for (n
=0; n
<8; ++n
, o
+=16)
633 fprintf(f
, "\tmovaps %%xmm%d, %d(%%rbp)\n", n
, o
);
635 for (r
=amd64_sysv_rclob
; r
<&amd64_sysv_rclob
[NCLR
]; r
++)
636 if (fn
->reg
& BIT(*r
)) {
637 itmp
.arg
[0] = TMP(*r
);
638 emitf("pushq %L0", &itmp
, e
);
642 for (lbl
=0, b
=fn
->start
; b
; b
=b
->link
) {
643 if (lbl
|| b
->npred
> 1)
644 fprintf(f
, "%sbb%d:\n", T
.asloc
, id0
+b
->id
);
645 for (i
=b
->ins
; i
!=&b
->ins
[b
->nins
]; i
++)
648 switch (b
->jmp
.type
) {
650 fprintf(f
, "\tud2\n");
655 "\tmovq %%rbp, %%rsp\n"
656 "\tsubq $%"PRIu64
", %%rsp\n",
657 e
->fsz
+ e
->nclob
* 8);
658 for (r
=&amd64_sysv_rclob
[NCLR
]; r
>amd64_sysv_rclob
;)
659 if (fn
->reg
& BIT(*--r
)) {
660 itmp
.arg
[0] = TMP(*r
);
661 emitf("popq %L0", &itmp
, e
);
664 fputs("\tleave\n", f
);
667 "\taddq $%"PRIu64
", %%rsp\n",
673 if (b
->s1
!= b
->link
)
674 fprintf(f
, "\tjmp %sbb%d\n",
675 T
.asloc
, id0
+b
->s1
->id
);
680 c
= b
->jmp
.type
- Jjf
;
681 if (0 <= c
&& c
<= NCmp
) {
682 if (b
->link
== b
->s2
) {
688 fprintf(f
, "\tj%s %sbb%d\n", ctoa
[c
],
689 T
.asloc
, id0
+b
->s2
->id
);
692 die("unhandled jump %d", b
->jmp
.type
);
697 elf_emitfnfin(fn
->name
, f
);