30 Ki
= -1, /* matches Kw and Kl */
31 Ka
= -2, /* matches all classes */
34 /* Instruction format strings:
36 * if the format string starts with -, the instruction
37 * is assumed to be 3-address and is put in 2-address
38 * mode using an extra mov if necessary
40 * if the format string starts with +, the same as the
41 * above applies, but commutativity is also assumed
43 * %k is used to set the class of the instruction,
44 * it'll expand to "l", "q", "ss", "sd", depending
45 * on the instruction class
46 * %0 designates the first argument
47 * %1 designates the second argument
48 * %= designates the result
50 * if %k is not used, a prefix to 0, 1, or = must be
52 * M - memory reference
57 * S - single precision float
58 * D - double precision float
65 { Oadd
, Ka
, "+add%k %1, %=" },
66 { Osub
, Ka
, "-sub%k %1, %=" },
67 { Oand
, Ki
, "+and%k %1, %=" },
68 { Oor
, Ki
, "+or%k %1, %=" },
69 { Oxor
, Ki
, "+xor%k %1, %=" },
70 { Osar
, Ki
, "-sar%k %B1, %=" },
71 { Oshr
, Ki
, "-shr%k %B1, %=" },
72 { Oshl
, Ki
, "-shl%k %B1, %=" },
73 { Omul
, Ki
, "+imul%k %1, %=" },
74 { Omul
, Ks
, "+mulss %1, %=" },
75 { Omul
, Kd
, "+mulsd %1, %=" },
76 { Odiv
, Ka
, "-div%k %1, %=" },
77 { Ostorel
, Ka
, "movq %L0, %M1" },
78 { Ostorew
, Ka
, "movl %W0, %M1" },
79 { Ostoreh
, Ka
, "movw %H0, %M1" },
80 { Ostoreb
, Ka
, "movb %B0, %M1" },
81 { Ostores
, Ka
, "movss %S0, %M1" },
82 { Ostored
, Ka
, "movsd %D0, %M1" },
83 { Oload
, Ka
, "mov%k %M0, %=" },
84 { Oloadsw
, Kl
, "movslq %M0, %L=" },
85 { Oloadsw
, Kw
, "movl %M0, %W=" },
86 { Oloaduw
, Ki
, "movl %M0, %W=" },
87 { Oloadsh
, Ki
, "movsw%k %M0, %=" },
88 { Oloaduh
, Ki
, "movzw%k %M0, %=" },
89 { Oloadsb
, Ki
, "movsb%k %M0, %=" },
90 { Oloadub
, Ki
, "movzb%k %M0, %=" },
91 { Oextsw
, Kl
, "movslq %W0, %L=" },
92 { Oextuw
, Kl
, "movl %W0, %W=" },
93 { Oextsh
, Ki
, "movsw%k %H0, %=" },
94 { Oextuh
, Ki
, "movzw%k %H0, %=" },
95 { Oextsb
, Ki
, "movsb%k %B0, %=" },
96 { Oextub
, Ki
, "movzb%k %B0, %=" },
98 { Oexts
, Kd
, "cvtss2sd %0, %=" },
99 { Otruncd
, Ks
, "cvtsd2ss %0, %=" },
100 { Ostosi
, Ki
, "cvttss2si%k %0, %=" },
101 { Odtosi
, Ki
, "cvttsd2si%k %0, %=" },
102 { Oswtof
, Ka
, "cvtsi2%k %W0, %=" },
103 { Osltof
, Ka
, "cvtsi2%k %L0, %=" },
104 { Ocast
, Ki
, "movq %D0, %L=" },
105 { Ocast
, Ka
, "movq %L0, %D=" },
107 { Oaddr
, Ki
, "lea%k %M0, %=" },
108 { Oswap
, Ki
, "xchg%k %0, %1" },
109 { Osign
, Kl
, "cqto" },
110 { Osign
, Kw
, "cltd" },
111 { Oxdiv
, Ki
, "div%k %0" },
112 { Oxidiv
, Ki
, "idiv%k %0" },
113 { Oxcmp
, Ks
, "comiss %S0, %S1" },
114 { Oxcmp
, Kd
, "comisd %D0, %D1" },
115 { Oxcmp
, Ki
, "cmp%k %0, %1" },
116 { Oxtest
, Ki
, "test%k %0, %1" },
118 { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
124 static char *rname
[][4] = {
125 [RAX
] = {"rax", "eax", "ax", "al"},
126 [RBX
] = {"rbx", "ebx", "bx", "bl"},
127 [RCX
] = {"rcx", "ecx", "cx", "cl"},
128 [RDX
] = {"rdx", "edx", "dx", "dl"},
129 [RSI
] = {"rsi", "esi", "si", "sil"},
130 [RDI
] = {"rdi", "edi", "di", "dil"},
131 [RBP
] = {"rbp", "ebp", "bp", "bpl"},
132 [RSP
] = {"rsp", "esp", "sp", "spl"},
133 [R8
] = {"r8" , "r8d", "r8w", "r8b"},
134 [R9
] = {"r9" , "r9d", "r9w", "r9b"},
135 [R10
] = {"r10", "r10d", "r10w", "r10b"},
136 [R11
] = {"r11", "r11d", "r11w", "r11b"},
137 [R12
] = {"r12", "r12d", "r12w", "r12b"},
138 [R13
] = {"r13", "r13d", "r13w", "r13b"},
139 [R14
] = {"r14", "r14d", "r14w", "r14b"},
140 [R15
] = {"r15", "r15d", "r15w", "r15b"},
147 struct { int i
:29; } x
;
149 /* sign extend s using a bitfield */
151 assert(x
.i
<= fn
->slot
);
152 /* specific to NAlign == 3 */
156 return -176 + -4 * (fn
->slot
- x
.i
);
158 return -4 * (fn
->slot
- x
.i
);
162 emitcon(Con
*con
, FILE *f
)
168 p
= con
->local
? gasloc
: gassym
;
169 fprintf(f
, "%s%s", p
, str(con
->label
));
171 fprintf(f
, "%+"PRId64
, con
->bits
.i
);
174 fprintf(f
, "%"PRId64
, con
->bits
.i
);
182 regtoa(int reg
, int sz
)
187 sprintf(buf
, "xmm%d", reg
-XMM0
);
190 return rname
[reg
][sz
];
194 getarg(char c
, Ins
*i
)
204 die("invalid arg letter %c", c
);
208 static void emitins(Ins
, Fn
*, FILE *);
211 emitcopy(Ref r1
, Ref r2
, int k
, Fn
*fn
, FILE *f
)
223 emitf(char *s
, Ins
*i
, Fn
*fn
, FILE *f
)
225 static char clstoa
[][3] = {"l", "q", "ss", "sd"};
234 if (req(i
->arg
[1], i
->to
)) {
236 i
->arg
[0] = i
->arg
[1];
241 assert((!req(i
->arg
[1], i
->to
) || req(i
->arg
[0], i
->to
)) &&
242 "cannot convert to 2-address");
243 emitcopy(i
->to
, i
->arg
[0], i
->cls
, fn
, f
);
250 while ((c
= *s
++) != '%')
256 switch ((c
= *s
++)) {
261 fputs(clstoa
[i
->cls
], f
);
266 sz
= KWIDE(i
->cls
) ? SLong
: SWord
;
271 sz
= SLong
; /* does not matter for floats */
275 switch (rtype(ref
)) {
278 fprintf(f
, "%%%s", regtoa(ref
.val
, sz
));
281 fprintf(f
, "%d(%%rbp)", slot(ref
.val
, fn
));
285 m
= &fn
->mem
[ref
.val
];
286 if (rtype(m
->base
) == RSlot
) {
288 off
.bits
.i
= slot(m
->base
.val
, fn
);
289 addcon(&m
->offset
, &off
);
292 if (m
->offset
.type
!= CUndef
)
293 emitcon(&m
->offset
, f
);
298 fprintf(f
, "%%%s", regtoa(m
->base
.val
, SLong
));
299 if (!req(m
->index
, R
))
300 fprintf(f
, ", %%%s, %d",
301 regtoa(m
->index
.val
, SLong
),
308 emitcon(&fn
->con
[ref
.val
], f
);
329 switch (rtype(ref
)) {
333 fprintf(f
, "%d(%%rbp)", slot(ref
.val
, fn
));
336 emitcon(&fn
->con
[ref
.val
], f
);
337 fprintf(f
, "(%%rip)");
341 fprintf(f
, "(%%%s)", regtoa(ref
.val
, SLong
));
348 die("invalid format specifier %%%c", c
);
353 static void *negmask
[4] = {
354 [Ks
] = (uint32_t[4]){ 0x80000000 },
355 [Kd
] = (uint64_t[2]){ 0x8000000000000000 },
359 emitins(Ins i
, Fn
*fn
, FILE *f
)
368 /* most instructions are just pulled out of
369 * the table omap[], some special cases are
372 /* this linear search should really be a binary
374 if (omap
[o
].op
== NOp
)
375 die("no match for %s(%d)",
376 optab
[i
.op
].name
, "wlsd"[i
.cls
]);
377 if (omap
[o
].op
== i
.op
)
378 if (omap
[o
].cls
== i
.cls
379 || (omap
[o
].cls
== Ki
&& KBASE(i
.cls
) == 0)
380 || (omap
[o
].cls
== Ka
))
383 emitf(omap
[o
].asm, &i
, fn
, f
);
386 /* just do nothing for nops, they are inserted
390 /* here, we try to use the 3-addresss form
391 * of multiplication when possible */
392 if (rtype(i
.arg
[1]) == RCon
) {
397 if (KBASE(i
.cls
) == 0 /* only available for ints */
398 && rtype(i
.arg
[0]) == RCon
399 && rtype(i
.arg
[1]) == RTmp
) {
400 emitf("imul%k %0, %1, %=", &i
, fn
, f
);
405 /* we have to use the negation trick to handle
406 * some 3-address subtractions */
407 if (req(i
.to
, i
.arg
[1])) {
408 if (KBASE(i
.cls
) == 0)
409 emitf("neg%k %=", &i
, fn
, f
);
412 "\txorp%c %sfp%d(%%rip), %%%s\n",
415 gasstash(negmask
[i
.cls
], 16),
416 regtoa(i
.to
.val
, SLong
)
418 emitf("add%k %0, %=", &i
, fn
, f
);
423 /* use xmm15 to adjust the instruction when the
424 * conversion to 2-address in emitf() would fail */
425 if (req(i
.to
, i
.arg
[1])) {
426 i
.arg
[1] = TMP(XMM0
+15);
427 emitf("mov%k %=, %1", &i
, fn
, f
);
428 emitf("mov%k %0, %=", &i
, fn
, f
);
433 /* make sure we don't emit useless copies,
434 * also, we can use a trick to load 64-bits
435 * registers, it's detailed in my note below
436 * http://c9x.me/art/notes.html?09/19/2015 */
437 t0
= rtype(i
.arg
[0]);
438 if (req(i
.to
, R
) || req(i
.arg
[0], R
))
443 && fn
->con
[i
.arg
[0].val
].type
== CBits
444 && (val
= fn
->con
[i
.arg
[0].val
].bits
.i
) >= 0
445 && val
<= UINT32_MAX
) {
446 emitf("movl %W0, %W=", &i
, fn
, f
);
447 } else if (isreg(i
.to
)
449 && fn
->con
[i
.arg
[0].val
].type
== CAddr
) {
450 emitf("lea%k %M0, %=", &i
, fn
, f
);
451 } else if (rtype(i
.to
) == RSlot
452 && (t0
== RSlot
|| t0
== RMem
)) {
453 i
.cls
= KWIDE(i
.cls
) ? Kd
: Ks
;
454 i
.arg
[1] = TMP(XMM0
+15);
455 emitf("mov%k %0, %1", &i
, fn
, f
);
456 emitf("mov%k %1, %=", &i
, fn
, f
);
458 } else if (!req(i
.arg
[0], i
.to
))
459 emitf("mov%k %0, %=", &i
, fn
, f
);
462 /* calls simply have a weird syntax in AT&T
464 switch (rtype(i
.arg
[0])) {
466 fprintf(f
, "\tcallq ");
467 emitcon(&fn
->con
[i
.arg
[0].val
], f
);
471 emitf("callq *%L0", &i
, fn
, f
);
474 die("invalid call argument");
478 /* there is no good reason why this is here
479 * maybe we should split Osalloc in 2 different
480 * instructions depending on the result
482 emitf("subq %L0, %%rsp", &i
, fn
, f
);
484 emitcopy(i
.to
, TMP(RSP
), Kl
, fn
, f
);
487 if (KBASE(i
.cls
) == 0)
489 /* for floats, there is no swap instruction
490 * so we use xmm15 as a temporary
492 emitcopy(TMP(XMM0
+15), i
.arg
[0], i
.cls
, fn
, f
);
493 emitcopy(i
.arg
[0], i
.arg
[1], i
.cls
, fn
, f
);
494 emitcopy(i
.arg
[1], TMP(XMM0
+15), i
.cls
, fn
, f
);
504 /* specific to NAlign == 3 */
505 for (i
=0, o
=0; i
<NCLR
; i
++)
506 o
^= 1 & (fn
->reg
>> amd64_sysv_rclob
[i
]);
509 return 4*f
+ 8*o
+ 176*fn
->vararg
;
513 amd64_emitfn(Fn
*fn
, FILE *f
)
515 static char *ctoa
[] = {
516 #define X(c, s) [c] = s,
523 int *r
, c
, o
, n
, lbl
;
526 fprintf(f
, ".text\n");
528 fprintf(f
, ".globl %s%s\n", gassym
, fn
->name
);
532 "\tmovq %%rsp, %%rbp\n",
537 fprintf(f
, "\tsub $%"PRIu64
", %%rsp\n", fs
);
540 for (r
=amd64_sysv_rsave
; r
<&amd64_sysv_rsave
[6]; r
++, o
+=8)
541 fprintf(f
, "\tmovq %%%s, %d(%%rbp)\n", rname
[*r
][0], o
);
542 for (n
=0; n
<8; ++n
, o
+=16)
543 fprintf(f
, "\tmovaps %%xmm%d, %d(%%rbp)\n", n
, o
);
545 for (r
=amd64_sysv_rclob
; r
<&amd64_sysv_rclob
[NCLR
]; r
++)
546 if (fn
->reg
& BIT(*r
)) {
547 itmp
.arg
[0] = TMP(*r
);
548 emitf("pushq %L0", &itmp
, fn
, f
);
552 for (lbl
=0, b
=fn
->start
; b
; b
=b
->link
) {
553 if (lbl
|| b
->npred
> 1)
554 fprintf(f
, "%sbb%d:\n", gasloc
, id0
+b
->id
);
555 for (i
=b
->ins
; i
!=&b
->ins
[b
->nins
]; i
++)
558 switch (b
->jmp
.type
) {
562 "\tmovq %%rbp, %%rsp\n"
563 "\tsubq $%"PRIu64
", %%rsp\n",
566 for (r
=&amd64_sysv_rclob
[NCLR
]; r
>amd64_sysv_rclob
;)
567 if (fn
->reg
& BIT(*--r
)) {
568 itmp
.arg
[0] = TMP(*r
);
569 emitf("popq %L0", &itmp
, fn
, f
);
578 if (b
->s1
!= b
->link
)
579 fprintf(f
, "\tjmp %sbb%d\n",
580 gasloc
, id0
+b
->s1
->id
);
585 c
= b
->jmp
.type
- Jjf
;
586 if (0 <= c
&& c
<= NCmp
) {
587 if (b
->link
== b
->s2
) {
593 fprintf(f
, "\tj%s %sbb%d\n", ctoa
[c
],
594 gasloc
, id0
+b
->s2
->id
);
597 die("unhandled jump %d", b
->jmp
.type
);