3 char *locprefix
, *symprefix
;
11 Ki
= -1, /* matches Kw and Kl */
12 Ka
= -2, /* matches all classes */
15 /* Instruction format strings:
17 * if the format string starts with -, the instruction
18 * is assumed to be 3-address and is put in 2-address
19 * mode using an extra mov if necessary
21 * if the format string starts with +, the same as the
22 * above applies, but commutativity is also assumed
24 * %k is used to set the class of the instruction,
25 * it'll expand to "l", "q", "ss", "sd", depending
26 * on the instruction class
27 * %0 designates the first argument
28 * %1 designates the second argument
29 * %= designates the result
31 * if %k is not used, a prefix to 0, 1, or = must be
33 * M - memory reference
38 * S - single precision float
39 * D - double precision float
46 { OAdd
, Ka
, "+add%k %1, %=" },
47 { OSub
, Ka
, "-sub%k %1, %=" },
48 { OAnd
, Ki
, "+and%k %1, %=" },
49 { OOr
, Ki
, "+or%k %1, %=" },
50 { OXor
, Ki
, "+xor%k %1, %=" },
51 { OSar
, Ki
, "-sar%k %B1, %=" },
52 { OShr
, Ki
, "-shr%k %B1, %=" },
53 { OShl
, Ki
, "-shl%k %B1, %=" },
54 { OMul
, Ki
, "+imul%k %1, %=" },
55 { OMul
, Ks
, "+mulss %1, %=" }, /* fixme */
56 { OMul
, Kd
, "+mulsd %1, %=" },
57 { ODiv
, Ka
, "-div%k %1, %=" },
58 { OStorel
, Ka
, "movq %L0, %M1" },
59 { OStorew
, Ka
, "movl %W0, %M1" },
60 { OStoreh
, Ka
, "movw %H0, %M1" },
61 { OStoreb
, Ka
, "movb %B0, %M1" },
62 { OStores
, Ka
, "movss %S0, %M1" },
63 { OStored
, Ka
, "movsd %D0, %M1" },
64 { OLoad
, Ka
, "mov%k %M0, %=" },
65 { OLoadsw
, Kl
, "movslq %M0, %L=" },
66 { OLoadsw
, Kw
, "movl %M0, %W=" },
67 { OLoaduw
, Ki
, "movl %M0, %W=" },
68 { OLoadsh
, Ki
, "movsw%k %M0, %=" },
69 { OLoaduh
, Ki
, "movzw%k %M0, %=" },
70 { OLoadsb
, Ki
, "movsb%k %M0, %=" },
71 { OLoadub
, Ki
, "movzb%k %M0, %=" },
72 { OExtsw
, Kl
, "movslq %W0, %L=" },
73 { OExtuw
, Kl
, "movl %W0, %W=" },
74 { OExtsh
, Ki
, "movsw%k %H0, %=" },
75 { OExtuh
, Ki
, "movzw%k %H0, %=" },
76 { OExtsb
, Ki
, "movsb%k %B0, %=" },
77 { OExtub
, Ki
, "movzb%k %B0, %=" },
79 { OExts
, Kd
, "cvtss2sd %0, %=" }, /* see if factorization is possible */
80 { OTruncd
, Ks
, "cvttsd2ss %0, %=" },
81 { OFtosi
, Kw
, "cvttss2si %0, %=" },
82 { OFtosi
, Kl
, "cvttsd2si %0, %=" },
83 { OSitof
, Ks
, "cvtsi2ss %W0, %=" },
84 { OSitof
, Kd
, "cvtsi2sd %L0, %=" },
85 { OCast
, Ki
, "movq %D0, %L=" },
86 { OCast
, Ka
, "movq %L0, %D=" },
88 { OAddr
, Ki
, "lea%k %M0, %=" },
89 { OSwap
, Ki
, "xchg%k %0, %1" },
90 { OSign
, Kl
, "cqto" },
91 { OSign
, Kw
, "cltd" },
92 { OXDiv
, Ki
, "div%k %0" },
93 { OXIDiv
, Ki
, "idiv%k %0" },
94 { OXCmp
, Ks
, "comiss %S0, %S1" }, /* fixme, Kf */
95 { OXCmp
, Kd
, "comisd %D0, %D1" },
96 { OXCmp
, Ki
, "cmp%k %0, %1" },
97 { OXTest
, Ki
, "test%k %0, %1" },
98 { OXSet
+ICule
, Ki
, "setbe %B=\n\tmovzb%k %B=, %=" },
99 { OXSet
+ICult
, Ki
, "setb %B=\n\tmovzb%k %B=, %=" },
100 { OXSet
+ICsle
, Ki
, "setle %B=\n\tmovzb%k %B=, %=" },
101 { OXSet
+ICslt
, Ki
, "setl %B=\n\tmovzb%k %B=, %=" },
102 { OXSet
+ICsgt
, Ki
, "setg %B=\n\tmovzb%k %B=, %=" },
103 { OXSet
+ICsge
, Ki
, "setge %B=\n\tmovzb%k %B=, %=" },
104 { OXSet
+ICugt
, Ki
, "seta %B=\n\tmovzb%k %B=, %=" },
105 { OXSet
+ICuge
, Ki
, "setae %B=\n\tmovzb%k %B=, %=" },
106 { OXSet
+ICeq
, Ki
, "setz %B=\n\tmovzb%k %B=, %=" },
107 { OXSet
+ICne
, Ki
, "setnz %B=\n\tmovzb%k %B=, %=" },
108 { OXSet
+ICXnp
, Ki
, "setnp %B=\n\tmovsb%k %B=, %=" },
109 { OXSet
+ICXp
, Ki
, "setp %B=\n\tmovsb%k %B=, %=" },
113 static char *rname
[][4] = {
114 [RAX
] = {"rax", "eax", "ax", "al"},
115 [RBX
] = {"rbx", "ebx", "bx", "bl"},
116 [RCX
] = {"rcx", "ecx", "cx", "cl"},
117 [RDX
] = {"rdx", "edx", "dx", "dl"},
118 [RSI
] = {"rsi", "esi", "si", "sil"},
119 [RDI
] = {"rdi", "edi", "di", "dil"},
120 [RBP
] = {"rbp", "ebp", "bp", "bpl"},
121 [RSP
] = {"rsp", "esp", "sp", "spl"},
122 [R8
] = {"r8" , "r8d", "r8w", "r8b"},
123 [R9
] = {"r9" , "r9d", "r9w", "r9b"},
124 [R10
] = {"r10", "r10d", "r10w", "r10b"},
125 [R11
] = {"r11", "r11d", "r11w", "r11b"},
126 [R12
] = {"r12", "r12d", "r12w", "r12b"},
127 [R13
] = {"r13", "r13d", "r13w", "r13b"},
128 [R14
] = {"r14", "r14d", "r14w", "r14b"},
129 [R15
] = {"r15", "r15d", "r15w", "r15b"},
136 struct { int i
:29; } x
;
138 /* sign extend s using a bitfield */
140 /* specific to NAlign == 3 */
144 assert(fn
->slot
>= x
.i
);
145 return -4 * (fn
->slot
- x
.i
);
150 emitcon(Con
*con
, FILE *f
)
155 fprintf(f
, "%s%s", locprefix
, con
->label
);
157 fprintf(f
, "%s%s", symprefix
, con
->label
);
159 fprintf(f
, "%+"PRId64
, con
->bits
.i
);
162 fprintf(f
, "%"PRId64
, con
->bits
.i
);
170 regtoa(int reg
, int sz
)
175 sprintf(buf
, "xmm%d", reg
-XMM0
);
178 return rname
[reg
][sz
];
182 getarg(char c
, Ins
*i
)
192 die("invalid arg letter %c", c
);
196 static void emitins(Ins
, Fn
*, FILE *);
199 emitcopy(Ref r1
, Ref r2
, int k
, Fn
*fn
, FILE *f
)
211 emitf(char *s
, Ins
*i
, Fn
*fn
, FILE *f
)
213 static char clstoa
[][3] = {"l", "q", "ss", "sd"};
222 if (req(i
->arg
[1], i
->to
)) {
224 i
->arg
[0] = i
->arg
[1];
229 assert((!req(i
->arg
[1], i
->to
) || req(i
->arg
[0], i
->to
)) &&
230 "cannot convert to 2-address");
231 emitcopy(i
->to
, i
->arg
[0], i
->cls
, fn
, f
);
238 while ((c
= *s
++) != '%')
244 switch ((c
= *s
++)) {
249 fputs(clstoa
[i
->cls
], f
);
254 sz
= KWIDE(i
->cls
) ? SLong
: SWord
;
259 sz
= SLong
; /* does not matter for floats */
263 switch (rtype(ref
)) {
266 fprintf(f
, "%%%s", regtoa(ref
.val
, sz
));
269 fprintf(f
, "%d(%%rbp)", slot(ref
.val
, fn
));
273 m
= &fn
->mem
[ref
.val
];
274 if (rtype(m
->base
) == RSlot
) {
276 off
.bits
.i
= slot(m
->base
.val
, fn
);
277 addcon(&m
->offset
, &off
);
280 if (m
->offset
.type
!= CUndef
)
281 emitcon(&m
->offset
, f
);
286 fprintf(f
, "%%%s", regtoa(m
->base
.val
, SLong
));
287 if (!req(m
->index
, R
))
288 fprintf(f
, ", %%%s, %d",
289 regtoa(m
->index
.val
, SLong
),
296 emitcon(&fn
->con
[ref
.val
], f
);
317 switch (rtype(ref
)) {
321 fprintf(f
, "%d(%%rbp)", slot(ref
.val
, fn
));
324 emitcon(&fn
->con
[ref
.val
], f
);
325 fprintf(f
, "(%%rip)");
329 fprintf(f
, "(%%%s)", regtoa(ref
.val
, SLong
));
336 die("invalid format specifier %%%c", c
);
342 emitins(Ins i
, Fn
*fn
, FILE *f
)
351 /* most instructions are just pulled out of
352 * the table omap[], some special cases are
355 /* this linear search should really be a binary
357 if (omap
[o
].op
== NOp
)
358 die("no match for %s(%d)", opdesc
[i
.op
].name
, i
.cls
);
359 if (omap
[o
].op
== i
.op
)
360 if (omap
[o
].cls
== i
.cls
361 || (omap
[o
].cls
== Ki
&& KBASE(i
.cls
) == 0)
362 || (omap
[o
].cls
== Ka
))
365 emitf(omap
[o
].asm, &i
, fn
, f
);
368 /* just do nothing for nops, they are inserted
372 /* here, we try to use the 3-addresss form
373 * of multiplication when possible */
374 if (rtype(i
.arg
[1]) == RCon
) {
379 if (KBASE(i
.cls
) == 0 /* only available for ints */
380 && rtype(i
.arg
[0]) == RCon
381 && rtype(i
.arg
[1]) == RTmp
) {
382 emitf("imul%k %0, %1, %=", &i
, fn
, f
);
387 /* we have to use the negation trick to handle
388 * some 3-address substractions */
389 if (req(i
.to
, i
.arg
[1])) {
390 emitf("neg%k %=", &i
, fn
, f
);
391 emitf("add%k %0, %=", &i
, fn
, f
);
396 /* make sure we don't emit useless copies,
397 * also, we can use a trick to load 64-bits
398 * registers, it's detailed in my note below
399 * http://c9x.me/art/notes.html?09/19/2015 */
400 if (req(i
.to
, R
) || req(i
.arg
[0], R
))
403 && rtype(i
.arg
[0]) == RCon
405 && fn
->con
[i
.arg
[0].val
].type
== CBits
406 && (val
= fn
->con
[i
.arg
[0].val
].bits
.i
) >= 0
407 && val
<= UINT32_MAX
) {
408 emitf("movl %W0, %W=", &i
, fn
, f
);
409 } else if (isreg(i
.to
)
410 && rtype(i
.arg
[0]) == RCon
411 && fn
->con
[i
.arg
[0].val
].type
== CAddr
) {
412 emitf("lea%k %M0, %=", &i
, fn
, f
);
413 } else if (!req(i
.arg
[0], i
.to
))
414 emitf("mov%k %0, %=", &i
, fn
, f
);
417 /* calls simply have a weird syntax in AT&T
419 switch (rtype(i
.arg
[0])) {
421 fprintf(f
, "\tcallq ");
422 emitcon(&fn
->con
[i
.arg
[0].val
], f
);
426 emitf("callq *%L0", &i
, fn
, f
);
429 die("invalid call argument");
433 /* there is no good reason why this is here
434 * maybe we should split OSAlloc in 2 different
435 * instructions depending on the result
437 emitf("subq %L0, %%rsp", &i
, fn
, f
);
439 emitcopy(i
.to
, TMP(RSP
), Kl
, fn
, f
);
442 if (KBASE(i
.cls
) == 0)
444 /* for floats, there is no swap instruction
445 * so we use xmm15 as a temporary
447 emitcopy(TMP(XMM0
+15), i
.arg
[0], i
.cls
, fn
, f
);
448 emitcopy(i
.arg
[0], i
.arg
[1], i
.cls
, fn
, f
);
449 emitcopy(i
.arg
[1], TMP(XMM0
+15), i
.cls
, fn
, f
);
458 default: die("invalid int comparison %d", cmp
);
459 case ICule
: return ICugt
;
460 case ICult
: return ICuge
;
461 case ICsle
: return ICsgt
;
462 case ICslt
: return ICsge
;
463 case ICsgt
: return ICsle
;
464 case ICsge
: return ICslt
;
465 case ICugt
: return ICule
;
466 case ICuge
: return ICult
;
467 case ICeq
: return ICne
;
468 case ICne
: return ICeq
;
469 case ICXnp
: return ICXp
;
470 case ICXp
: return ICXnp
;
479 /* specific to NAlign == 3 */
480 for (i
=0, o
=0; i
<NRClob
; i
++)
481 o
^= 1 & (fn
->reg
>> rclob
[i
]);
488 emitfn(Fn
*fn
, FILE *f
)
490 static char *ctoa
[] = {
509 fprintf(f
, ".text\n");
511 fprintf(f
, ".globl %s%s\n", symprefix
, fn
->name
);
515 "\tmov %%rsp, %%rbp\n",
520 fprintf(f
, "\tsub $%d, %%rsp\n", fs
);
521 for (r
=rclob
; r
-rclob
< NRClob
; r
++)
522 if (fn
->reg
& BIT(*r
)) {
523 itmp
.arg
[0] = TMP(*r
);
524 emitf("pushq %L0", &itmp
, fn
, f
);
527 for (b
=fn
->start
; b
; b
=b
->link
) {
528 fprintf(f
, "%sbb%d: /* %s */\n", locprefix
, id0
+b
->id
, b
->name
);
529 for (i
=b
->ins
; i
!=&b
->ins
[b
->nins
]; i
++)
531 switch (b
->jmp
.type
) {
533 for (r
=&rclob
[NRClob
]; r
>rclob
;)
534 if (fn
->reg
& BIT(*--r
)) {
535 itmp
.arg
[0] = TMP(*r
);
536 emitf("popq %L0", &itmp
, fn
, f
);
545 if (b
->s1
!= b
->link
)
546 fprintf(f
, "\tjmp %sbb%d /* %s */\n",
547 locprefix
, id0
+b
->s1
->id
, b
->s1
->name
);
550 c
= b
->jmp
.type
- JXJc
;
551 if (0 <= c
&& c
<= NXICmp
) {
552 if (b
->link
== b
->s2
) {
558 fprintf(f
, "\tj%s %sbb%d /* %s */\n", ctoa
[c
],
559 locprefix
, id0
+b
->s2
->id
, b
->s2
->name
);
562 die("unhandled jump %d", b
->jmp
.type
);
569 emitdat(Dat
*d
, FILE *f
)
572 static char *dtoa
[] = {
583 fprintf(f
, ".data\n");
589 fprintf(f
, ".align 8\n");
591 fprintf(f
, ".globl %s%s\n", symprefix
, d
->u
.str
);
592 fprintf(f
, "%s%s:\n", symprefix
, d
->u
.str
);
595 fprintf(f
, "\t.fill %"PRId64
",1,0\n", d
->u
.num
);
598 if (d
->type
== DAlign
)
603 err("strings only supported for 'b' currently");
604 fprintf(f
, "\t.ascii \"%s\"\n", d
->u
.str
);
607 fprintf(f
, "%s %s%+"PRId64
"\n",
608 dtoa
[d
->type
], d
->u
.ref
.nam
,
612 fprintf(f
, "%s %"PRId64
"\n",
613 dtoa
[d
->type
], d
->u
.num
);
619 typedef struct FBits FBits
;
634 stashfp(int64_t n
, int w
)
639 /* does a dumb de-dup of fp constants
640 * this should be the linker's job */
641 for (pb
=&stash
, i
=0; (b
=*pb
); pb
=&b
->link
, i
++)
642 if (n
== b
->bits
.n
&& w
== b
->wide
)
644 b
= emalloc(sizeof *b
);
660 fprintf(f
, "/* floating point constants */\n");
661 fprintf(f
, ".data\n.align 8\n");
662 for (b
=stash
, i
=0; b
; b
=b
->link
, i
++)
668 locprefix
, i
, b
->bits
.n
,
671 for (b
=stash
, i
=0; b
; b
=b
->link
, i
++)
677 locprefix
, i
, b
->bits
.n
& 0xffffffff,