30 Ki
= -1, /* matches Kw and Kl */
31 Ka
= -2, /* matches all classes */
34 /* Instruction format strings:
36 * if the format string starts with -, the instruction
37 * is assumed to be 3-address and is put in 2-address
38 * mode using an extra mov if necessary
40 * if the format string starts with +, the same as the
41 * above applies, but commutativity is also assumed
43 * %k is used to set the class of the instruction,
44 * it'll expand to "l", "q", "ss", "sd", depending
45 * on the instruction class
46 * %0 designates the first argument
47 * %1 designates the second argument
48 * %= designates the result
50 * if %k is not used, a prefix to 0, 1, or = must be
52 * M - memory reference
57 * S - single precision float
58 * D - double precision float
65 { Oadd
, Ka
, "+add%k %1, %=" },
66 { Osub
, Ka
, "-sub%k %1, %=" },
67 { Oand
, Ki
, "+and%k %1, %=" },
68 { Oor
, Ki
, "+or%k %1, %=" },
69 { Oxor
, Ki
, "+xor%k %1, %=" },
70 { Osar
, Ki
, "-sar%k %B1, %=" },
71 { Oshr
, Ki
, "-shr%k %B1, %=" },
72 { Oshl
, Ki
, "-shl%k %B1, %=" },
73 { Omul
, Ki
, "+imul%k %1, %=" },
74 { Omul
, Ks
, "+mulss %1, %=" },
75 { Omul
, Kd
, "+mulsd %1, %=" },
76 { Odiv
, Ka
, "-div%k %1, %=" },
77 { Ostorel
, Ka
, "movq %L0, %M1" },
78 { Ostorew
, Ka
, "movl %W0, %M1" },
79 { Ostoreh
, Ka
, "movw %H0, %M1" },
80 { Ostoreb
, Ka
, "movb %B0, %M1" },
81 { Ostores
, Ka
, "movss %S0, %M1" },
82 { Ostored
, Ka
, "movsd %D0, %M1" },
83 { Oload
, Ka
, "mov%k %M0, %=" },
84 { Oloadsw
, Kl
, "movslq %M0, %L=" },
85 { Oloadsw
, Kw
, "movl %M0, %W=" },
86 { Oloaduw
, Ki
, "movl %M0, %W=" },
87 { Oloadsh
, Ki
, "movsw%k %M0, %=" },
88 { Oloaduh
, Ki
, "movzw%k %M0, %=" },
89 { Oloadsb
, Ki
, "movsb%k %M0, %=" },
90 { Oloadub
, Ki
, "movzb%k %M0, %=" },
91 { Oextsw
, Kl
, "movslq %W0, %L=" },
92 { Oextuw
, Kl
, "movl %W0, %W=" },
93 { Oextsh
, Ki
, "movsw%k %H0, %=" },
94 { Oextuh
, Ki
, "movzw%k %H0, %=" },
95 { Oextsb
, Ki
, "movsb%k %B0, %=" },
96 { Oextub
, Ki
, "movzb%k %B0, %=" },
98 { Oexts
, Kd
, "cvtss2sd %0, %=" },
99 { Otruncd
, Ks
, "cvtsd2ss %0, %=" },
100 { Ostosi
, Ki
, "cvttss2si%k %0, %=" },
101 { Odtosi
, Ki
, "cvttsd2si%k %0, %=" },
102 { Oswtof
, Ka
, "cvtsi2%k %W0, %=" },
103 { Osltof
, Ka
, "cvtsi2%k %L0, %=" },
104 { Ocast
, Ki
, "movq %D0, %L=" },
105 { Ocast
, Ka
, "movq %L0, %D=" },
107 { Oaddr
, Ki
, "lea%k %M0, %=" },
108 { Oswap
, Ki
, "xchg%k %0, %1" },
109 { Osign
, Kl
, "cqto" },
110 { Osign
, Kw
, "cltd" },
111 { Oxdiv
, Ki
, "div%k %0" },
112 { Oxidiv
, Ki
, "idiv%k %0" },
113 { Oxcmp
, Ks
, "ucomiss %S0, %S1" },
114 { Oxcmp
, Kd
, "ucomisd %D0, %D1" },
115 { Oxcmp
, Ki
, "cmp%k %0, %1" },
116 { Oxtest
, Ki
, "test%k %0, %1" },
118 { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
124 static char *rname
[][4] = {
125 [RAX
] = {"rax", "eax", "ax", "al"},
126 [RBX
] = {"rbx", "ebx", "bx", "bl"},
127 [RCX
] = {"rcx", "ecx", "cx", "cl"},
128 [RDX
] = {"rdx", "edx", "dx", "dl"},
129 [RSI
] = {"rsi", "esi", "si", "sil"},
130 [RDI
] = {"rdi", "edi", "di", "dil"},
131 [RBP
] = {"rbp", "ebp", "bp", "bpl"},
132 [RSP
] = {"rsp", "esp", "sp", "spl"},
133 [R8
] = {"r8" , "r8d", "r8w", "r8b"},
134 [R9
] = {"r9" , "r9d", "r9w", "r9b"},
135 [R10
] = {"r10", "r10d", "r10w", "r10b"},
136 [R11
] = {"r11", "r11d", "r11w", "r11b"},
137 [R12
] = {"r12", "r12d", "r12w", "r12b"},
138 [R13
] = {"r13", "r13d", "r13w", "r13b"},
139 [R14
] = {"r14", "r14d", "r14w", "r14b"},
140 [R15
] = {"r15", "r15d", "r15w", "r15b"},
150 assert(s
<= fn
->slot
);
151 /* specific to NAlign == 3 */
155 return -176 + -4 * (fn
->slot
- s
);
157 return -4 * (fn
->slot
- s
);
161 emitcon(Con
*con
, FILE *f
)
167 l
= str(con
->sym
.id
);
168 p
= l
[0] == '"' ? "" : T
.assym
;
169 if (con
->sym
.type
== SThr
) {
171 fprintf(f
, "%s%s@TLVP", p
, l
);
173 fprintf(f
, "%%fs:%s%s@tpoff", p
, l
);
175 fprintf(f
, "%s%s", p
, l
);
177 fprintf(f
, "%+"PRId64
, con
->bits
.i
);
180 fprintf(f
, "%"PRId64
, con
->bits
.i
);
188 regtoa(int reg
, int sz
)
192 assert(reg
<= XMM15
);
194 sprintf(buf
, "xmm%d", reg
-XMM0
);
197 return rname
[reg
][sz
];
201 getarg(char c
, Ins
*i
)
211 die("invalid arg letter %c", c
);
215 static void emitins(Ins
, Fn
*, FILE *);
218 emitcopy(Ref r1
, Ref r2
, int k
, Fn
*fn
, FILE *f
)
230 emitf(char *s
, Ins
*i
, Fn
*fn
, FILE *f
)
232 static char clstoa
[][3] = {"l", "q", "ss", "sd"};
241 if (req(i
->arg
[1], i
->to
)) {
243 i
->arg
[0] = i
->arg
[1];
248 assert((!req(i
->arg
[1], i
->to
) || req(i
->arg
[0], i
->to
)) &&
249 "cannot convert to 2-address");
250 emitcopy(i
->to
, i
->arg
[0], i
->cls
, fn
, f
);
257 while ((c
= *s
++) != '%')
263 switch ((c
= *s
++)) {
268 fputs(clstoa
[i
->cls
], f
);
273 sz
= KWIDE(i
->cls
) ? SLong
: SWord
;
278 sz
= SLong
; /* does not matter for floats */
282 switch (rtype(ref
)) {
285 fprintf(f
, "%%%s", regtoa(ref
.val
, sz
));
288 fprintf(f
, "%d(%%rbp)", slot(ref
, fn
));
292 m
= &fn
->mem
[ref
.val
];
293 if (rtype(m
->base
) == RSlot
) {
295 off
.bits
.i
= slot(m
->base
, fn
);
296 addcon(&m
->offset
, &off
, 1);
299 if (m
->offset
.type
!= CUndef
)
300 emitcon(&m
->offset
, f
);
302 if (!req(m
->base
, R
))
303 fprintf(f
, "%%%s", regtoa(m
->base
.val
, SLong
));
304 else if (m
->offset
.type
== CAddr
)
306 if (!req(m
->index
, R
))
307 fprintf(f
, ", %%%s, %d",
308 regtoa(m
->index
.val
, SLong
),
315 emitcon(&fn
->con
[ref
.val
], f
);
336 switch (rtype(ref
)) {
340 fprintf(f
, "%d(%%rbp)", slot(ref
, fn
));
343 off
= fn
->con
[ref
.val
];
345 if (off
.type
== CAddr
)
346 if (off
.sym
.type
!= SThr
|| T
.apple
)
347 fprintf(f
, "(%%rip)");
351 fprintf(f
, "(%%%s)", regtoa(ref
.val
, SLong
));
358 die("invalid format specifier %%%c", c
);
363 static void *negmask
[4] = {
364 [Ks
] = (uint32_t[4]){ 0x80000000 },
365 [Kd
] = (uint64_t[2]){ 0x8000000000000000 },
369 emitins(Ins i
, Fn
*fn
, FILE *f
)
381 /* most instructions are just pulled out of
382 * the table omap[], some special cases are
385 /* this linear search should really be a binary
387 if (omap
[o
].op
== NOp
)
388 die("no match for %s(%c)",
389 optab
[i
.op
].name
, "wlsd"[i
.cls
]);
390 if (omap
[o
].op
== i
.op
)
391 if (omap
[o
].cls
== i
.cls
392 || (omap
[o
].cls
== Ki
&& KBASE(i
.cls
) == 0)
393 || (omap
[o
].cls
== Ka
))
396 emitf(omap
[o
].fmt
, &i
, fn
, f
);
399 /* just do nothing for nops, they are inserted
403 /* here, we try to use the 3-addresss form
404 * of multiplication when possible */
405 if (rtype(i
.arg
[1]) == RCon
) {
410 if (KBASE(i
.cls
) == 0 /* only available for ints */
411 && rtype(i
.arg
[0]) == RCon
412 && rtype(i
.arg
[1]) == RTmp
) {
413 emitf("imul%k %0, %1, %=", &i
, fn
, f
);
418 /* we have to use the negation trick to handle
419 * some 3-address subtractions */
420 if (req(i
.to
, i
.arg
[1]) && !req(i
.arg
[0], i
.to
)) {
421 ineg
= (Ins
){Oneg
, i
.cls
, i
.to
, {i
.to
}};
422 emitins(ineg
, fn
, f
);
423 emitf("add%k %0, %=", &i
, fn
, f
);
428 if (!req(i
.to
, i
.arg
[0]))
429 emitf("mov%k %0, %=", &i
, fn
, f
);
430 if (KBASE(i
.cls
) == 0)
431 emitf("neg%k %=", &i
, fn
, f
);
434 "\txorp%c %sfp%d(%%rip), %%%s\n",
437 stashbits(negmask
[i
.cls
], 16),
438 regtoa(i
.to
.val
, SLong
)
442 /* use xmm15 to adjust the instruction when the
443 * conversion to 2-address in emitf() would fail */
444 if (req(i
.to
, i
.arg
[1])) {
445 i
.arg
[1] = TMP(XMM0
+15);
446 emitf("mov%k %=, %1", &i
, fn
, f
);
447 emitf("mov%k %0, %=", &i
, fn
, f
);
452 /* copies are used for many things; see my note
453 * to understand how to load big constants:
454 * https://c9x.me/notes/2015-09-19.html */
455 assert(rtype(i
.to
) != RMem
);
456 if (req(i
.to
, R
) || req(i
.arg
[0], R
))
458 if (req(i
.to
, i
.arg
[0]))
460 t0
= rtype(i
.arg
[0]);
463 && fn
->con
[i
.arg
[0].val
].type
== CBits
) {
464 val
= fn
->con
[i
.arg
[0].val
].bits
.i
;
466 if (val
>= 0 && val
<= UINT32_MAX
) {
467 emitf("movl %W0, %W=", &i
, fn
, f
);
470 if (rtype(i
.to
) == RSlot
)
471 if (val
< INT32_MIN
|| val
> INT32_MAX
) {
472 emitf("movl %0, %=", &i
, fn
, f
);
473 emitf("movl %0>>32, 4+%=", &i
, fn
, f
);
479 && fn
->con
[i
.arg
[0].val
].type
== CAddr
) {
480 emitf("lea%k %M0, %=", &i
, fn
, f
);
483 if (rtype(i
.to
) == RSlot
484 && (t0
== RSlot
|| t0
== RMem
)) {
485 i
.cls
= KWIDE(i
.cls
) ? Kd
: Ks
;
486 i
.arg
[1] = TMP(XMM0
+15);
487 emitf("mov%k %0, %1", &i
, fn
, f
);
488 emitf("mov%k %1, %=", &i
, fn
, f
);
491 /* conveniently, the assembler knows if it
492 * should use movabsq when reading movq */
493 emitf("mov%k %0, %=", &i
, fn
, f
);
497 && rtype(i
.arg
[0]) == RCon
498 && fn
->con
[i
.arg
[0].val
].sym
.type
== SThr
) {
499 /* derive the symbol address from the TCB
500 * address at offset 0 of %fs */
502 con
= &fn
->con
[i
.arg
[0].val
];
503 sym
= str(con
->sym
.id
);
504 emitf("movq %%fs:0, %L=", &i
, fn
, f
);
505 fprintf(f
, "\tleaq %s%s@tpoff",
506 sym
[0] == '"' ? "" : T
.assym
, sym
);
508 fprintf(f
, "%+"PRId64
, con
->bits
.i
);
509 fprintf(f
, "(%%%s), %%%s\n",
510 regtoa(i
.to
.val
, SLong
),
511 regtoa(i
.to
.val
, SLong
));
516 /* calls simply have a weird syntax in AT&T
518 switch (rtype(i
.arg
[0])) {
520 fprintf(f
, "\tcallq ");
521 emitcon(&fn
->con
[i
.arg
[0].val
], f
);
525 emitf("callq *%L0", &i
, fn
, f
);
528 die("invalid call argument");
532 /* there is no good reason why this is here
533 * maybe we should split Osalloc in 2 different
534 * instructions depending on the result
536 emitf("subq %L0, %%rsp", &i
, fn
, f
);
538 emitcopy(i
.to
, TMP(RSP
), Kl
, fn
, f
);
541 if (KBASE(i
.cls
) == 0)
543 /* for floats, there is no swap instruction
544 * so we use xmm15 as a temporary
546 emitcopy(TMP(XMM0
+15), i
.arg
[0], i
.cls
, fn
, f
);
547 emitcopy(i
.arg
[0], i
.arg
[1], i
.cls
, fn
, f
);
548 emitcopy(i
.arg
[1], TMP(XMM0
+15), i
.cls
, fn
, f
);
551 emitdbgloc(i
.arg
[0].val
, i
.arg
[1].val
, f
);
561 /* specific to NAlign == 3 */
562 for (i
=0, o
=0; i
<NCLR
; i
++)
563 o
^= 1 & (fn
->reg
>> amd64_sysv_rclob
[i
]);
566 return 4*f
+ 8*o
+ 176*fn
->vararg
;
570 amd64_emitfn(Fn
*fn
, FILE *f
)
572 static char *ctoa
[] = {
573 #define X(c, s) [c] = s,
580 int *r
, c
, o
, n
, lbl
;
583 emitfnlnk(fn
->name
, &fn
->lnk
, f
);
584 fputs("\tendbr64\n\tpushq %rbp\n\tmovq %rsp, %rbp\n", f
);
587 fprintf(f
, "\tsubq $%"PRIu64
", %%rsp\n", fs
);
590 for (r
=amd64_sysv_rsave
; r
<&amd64_sysv_rsave
[6]; r
++, o
+=8)
591 fprintf(f
, "\tmovq %%%s, %d(%%rbp)\n", rname
[*r
][0], o
);
592 for (n
=0; n
<8; ++n
, o
+=16)
593 fprintf(f
, "\tmovaps %%xmm%d, %d(%%rbp)\n", n
, o
);
595 for (r
=amd64_sysv_rclob
; r
<&amd64_sysv_rclob
[NCLR
]; r
++)
596 if (fn
->reg
& BIT(*r
)) {
597 itmp
.arg
[0] = TMP(*r
);
598 emitf("pushq %L0", &itmp
, fn
, f
);
602 for (lbl
=0, b
=fn
->start
; b
; b
=b
->link
) {
603 if (lbl
|| b
->npred
> 1)
604 fprintf(f
, "%sbb%d:\n", T
.asloc
, id0
+b
->id
);
605 for (i
=b
->ins
; i
!=&b
->ins
[b
->nins
]; i
++)
608 switch (b
->jmp
.type
) {
610 fprintf(f
, "\tud2\n");
615 "\tmovq %%rbp, %%rsp\n"
616 "\tsubq $%"PRIu64
", %%rsp\n",
619 for (r
=&amd64_sysv_rclob
[NCLR
]; r
>amd64_sysv_rclob
;)
620 if (fn
->reg
& BIT(*--r
)) {
621 itmp
.arg
[0] = TMP(*r
);
622 emitf("popq %L0", &itmp
, fn
, f
);
631 if (b
->s1
!= b
->link
)
632 fprintf(f
, "\tjmp %sbb%d\n",
633 T
.asloc
, id0
+b
->s1
->id
);
638 c
= b
->jmp
.type
- Jjf
;
639 if (0 <= c
&& c
<= NCmp
) {
640 if (b
->link
== b
->s2
) {
646 fprintf(f
, "\tj%s %sbb%d\n", ctoa
[c
],
647 T
.asloc
, id0
+b
->s2
->id
);
650 die("unhandled jump %d", b
->jmp
.type
);
655 elf_emitfnfin(fn
->name
, f
);