3 char *locprefix
, *symprefix
;
11 Ki
= -1, /* matches Kw and Kl */
12 Ka
= -2, /* matches all classes */
15 /* Instruction format strings:
17 * if the format string starts with -, the instruction
18 * is assumed to be 3-address and is put in 2-address
19 * mode using an extra mov if necessary
21 * if the format string starts with +, the same as the
22 * above applies, but commutativity is also assumed
24 * %k is used to set the class of the instruction,
25 * it'll expand to "l", "q", "ss", "sd", depending
26 * on the instruction class
27 * %0 designates the first argument
28 * %1 designates the second argument
29 * %= designates the result
31 * if %k is not used, a prefix to 0, 1, or = must be
33 * M - memory reference
38 * S - single precision float
39 * D - double precision float
46 { Oadd
, Ka
, "+add%k %1, %=" },
47 { Osub
, Ka
, "-sub%k %1, %=" },
48 { Oand
, Ki
, "+and%k %1, %=" },
49 { Oor
, Ki
, "+or%k %1, %=" },
50 { Oxor
, Ki
, "+xor%k %1, %=" },
51 { Osar
, Ki
, "-sar%k %B1, %=" },
52 { Oshr
, Ki
, "-shr%k %B1, %=" },
53 { Oshl
, Ki
, "-shl%k %B1, %=" },
54 { Omul
, Ki
, "+imul%k %1, %=" },
55 { Omul
, Ks
, "+mulss %1, %=" }, /* fixme */
56 { Omul
, Kd
, "+mulsd %1, %=" },
57 { Odiv
, Ka
, "-div%k %1, %=" },
58 { Ostorel
, Ka
, "movq %L0, %M1" },
59 { Ostorew
, Ka
, "movl %W0, %M1" },
60 { Ostoreh
, Ka
, "movw %H0, %M1" },
61 { Ostoreb
, Ka
, "movb %B0, %M1" },
62 { Ostores
, Ka
, "movss %S0, %M1" },
63 { Ostored
, Ka
, "movsd %D0, %M1" },
64 { Oload
, Ka
, "mov%k %M0, %=" },
65 { Oloadsw
, Kl
, "movslq %M0, %L=" },
66 { Oloadsw
, Kw
, "movl %M0, %W=" },
67 { Oloaduw
, Ki
, "movl %M0, %W=" },
68 { Oloadsh
, Ki
, "movsw%k %M0, %=" },
69 { Oloaduh
, Ki
, "movzw%k %M0, %=" },
70 { Oloadsb
, Ki
, "movsb%k %M0, %=" },
71 { Oloadub
, Ki
, "movzb%k %M0, %=" },
72 { Oextsw
, Kl
, "movslq %W0, %L=" },
73 { Oextuw
, Kl
, "movl %W0, %W=" },
74 { Oextsh
, Ki
, "movsw%k %H0, %=" },
75 { Oextuh
, Ki
, "movzw%k %H0, %=" },
76 { Oextsb
, Ki
, "movsb%k %B0, %=" },
77 { Oextub
, Ki
, "movzb%k %B0, %=" },
79 { Oexts
, Kd
, "cvtss2sd %0, %=" }, /* see if factorization is possible */
80 { Otruncd
, Ks
, "cvttsd2ss %0, %=" },
81 { Ostosi
, Ki
, "cvttss2si%k %0, %=" },
82 { Odtosi
, Ki
, "cvttsd2si%k %0, %=" },
83 { Oswtof
, Ka
, "cvtsi2%k %W0, %=" },
84 { Osltof
, Ka
, "cvtsi2%k %L0, %=" },
85 { Ocast
, Ki
, "movq %D0, %L=" },
86 { Ocast
, Ka
, "movq %L0, %D=" },
88 { Oaddr
, Ki
, "lea%k %M0, %=" },
89 { Oswap
, Ki
, "xchg%k %0, %1" },
90 { Osign
, Kl
, "cqto" },
91 { Osign
, Kw
, "cltd" },
92 { Oxdiv
, Ki
, "div%k %0" },
93 { Oxidiv
, Ki
, "idiv%k %0" },
94 { Oxcmp
, Ks
, "comiss %S0, %S1" }, /* fixme, Kf */
95 { Oxcmp
, Kd
, "comisd %D0, %D1" },
96 { Oxcmp
, Ki
, "cmp%k %0, %1" },
97 { Oxtest
, Ki
, "test%k %0, %1" },
98 { Oxset
+ICule
, Ki
, "setbe %B=\n\tmovzb%k %B=, %=" },
99 { Oxset
+ICult
, Ki
, "setb %B=\n\tmovzb%k %B=, %=" },
100 { Oxset
+ICsle
, Ki
, "setle %B=\n\tmovzb%k %B=, %=" },
101 { Oxset
+ICslt
, Ki
, "setl %B=\n\tmovzb%k %B=, %=" },
102 { Oxset
+ICsgt
, Ki
, "setg %B=\n\tmovzb%k %B=, %=" },
103 { Oxset
+ICsge
, Ki
, "setge %B=\n\tmovzb%k %B=, %=" },
104 { Oxset
+ICugt
, Ki
, "seta %B=\n\tmovzb%k %B=, %=" },
105 { Oxset
+ICuge
, Ki
, "setae %B=\n\tmovzb%k %B=, %=" },
106 { Oxset
+ICeq
, Ki
, "setz %B=\n\tmovzb%k %B=, %=" },
107 { Oxset
+ICne
, Ki
, "setnz %B=\n\tmovzb%k %B=, %=" },
108 { Oxset
+ICxnp
, Ki
, "setnp %B=\n\tmovsb%k %B=, %=" },
109 { Oxset
+ICxp
, Ki
, "setp %B=\n\tmovsb%k %B=, %=" },
113 static char *rname
[][4] = {
114 [RAX
] = {"rax", "eax", "ax", "al"},
115 [RBX
] = {"rbx", "ebx", "bx", "bl"},
116 [RCX
] = {"rcx", "ecx", "cx", "cl"},
117 [RDX
] = {"rdx", "edx", "dx", "dl"},
118 [RSI
] = {"rsi", "esi", "si", "sil"},
119 [RDI
] = {"rdi", "edi", "di", "dil"},
120 [RBP
] = {"rbp", "ebp", "bp", "bpl"},
121 [RSP
] = {"rsp", "esp", "sp", "spl"},
122 [R8
] = {"r8" , "r8d", "r8w", "r8b"},
123 [R9
] = {"r9" , "r9d", "r9w", "r9b"},
124 [R10
] = {"r10", "r10d", "r10w", "r10b"},
125 [R11
] = {"r11", "r11d", "r11w", "r11b"},
126 [R12
] = {"r12", "r12d", "r12w", "r12b"},
127 [R13
] = {"r13", "r13d", "r13w", "r13b"},
128 [R14
] = {"r14", "r14d", "r14w", "r14b"},
129 [R15
] = {"r15", "r15d", "r15w", "r15b"},
136 struct { int i
:29; } x
;
138 /* sign extend s using a bitfield */
140 assert(x
.i
<= fn
->slot
);
141 /* specific to NAlign == 3 */
145 return -176 + -4 * (fn
->slot
- x
.i
);
147 return -4 * (fn
->slot
- x
.i
);
151 emitcon(Con
*con
, FILE *f
)
156 fprintf(f
, "%s%s", locprefix
, con
->label
);
158 fprintf(f
, "%s%s", symprefix
, con
->label
);
160 fprintf(f
, "%+"PRId64
, con
->bits
.i
);
163 fprintf(f
, "%"PRId64
, con
->bits
.i
);
171 regtoa(int reg
, int sz
)
176 sprintf(buf
, "xmm%d", reg
-XMM0
);
179 return rname
[reg
][sz
];
183 getarg(char c
, Ins
*i
)
193 die("invalid arg letter %c", c
);
197 static void emitins(Ins
, Fn
*, FILE *);
200 emitcopy(Ref r1
, Ref r2
, int k
, Fn
*fn
, FILE *f
)
212 emitf(char *s
, Ins
*i
, Fn
*fn
, FILE *f
)
214 static char clstoa
[][3] = {"l", "q", "ss", "sd"};
223 if (req(i
->arg
[1], i
->to
)) {
225 i
->arg
[0] = i
->arg
[1];
230 assert((!req(i
->arg
[1], i
->to
) || req(i
->arg
[0], i
->to
)) &&
231 "cannot convert to 2-address");
232 emitcopy(i
->to
, i
->arg
[0], i
->cls
, fn
, f
);
239 while ((c
= *s
++) != '%')
245 switch ((c
= *s
++)) {
250 fputs(clstoa
[i
->cls
], f
);
255 sz
= KWIDE(i
->cls
) ? SLong
: SWord
;
260 sz
= SLong
; /* does not matter for floats */
264 switch (rtype(ref
)) {
267 fprintf(f
, "%%%s", regtoa(ref
.val
, sz
));
270 fprintf(f
, "%d(%%rbp)", slot(ref
.val
, fn
));
274 m
= &fn
->mem
[ref
.val
];
275 if (rtype(m
->base
) == RSlot
) {
277 off
.bits
.i
= slot(m
->base
.val
, fn
);
278 addcon(&m
->offset
, &off
);
281 if (m
->offset
.type
!= CUndef
)
282 emitcon(&m
->offset
, f
);
287 fprintf(f
, "%%%s", regtoa(m
->base
.val
, SLong
));
288 if (!req(m
->index
, R
))
289 fprintf(f
, ", %%%s, %d",
290 regtoa(m
->index
.val
, SLong
),
297 emitcon(&fn
->con
[ref
.val
], f
);
318 switch (rtype(ref
)) {
322 fprintf(f
, "%d(%%rbp)", slot(ref
.val
, fn
));
325 emitcon(&fn
->con
[ref
.val
], f
);
326 fprintf(f
, "(%%rip)");
330 fprintf(f
, "(%%%s)", regtoa(ref
.val
, SLong
));
337 die("invalid format specifier %%%c", c
);
343 emitins(Ins i
, Fn
*fn
, FILE *f
)
352 /* most instructions are just pulled out of
353 * the table omap[], some special cases are
356 /* this linear search should really be a binary
358 if (omap
[o
].op
== NOp
)
359 die("no match for %s(%d)", opdesc
[i
.op
].name
, i
.cls
);
360 if (omap
[o
].op
== i
.op
)
361 if (omap
[o
].cls
== i
.cls
362 || (omap
[o
].cls
== Ki
&& KBASE(i
.cls
) == 0)
363 || (omap
[o
].cls
== Ka
))
366 emitf(omap
[o
].asm, &i
, fn
, f
);
369 /* just do nothing for nops, they are inserted
373 /* here, we try to use the 3-addresss form
374 * of multiplication when possible */
375 if (rtype(i
.arg
[1]) == RCon
) {
380 if (KBASE(i
.cls
) == 0 /* only available for ints */
381 && rtype(i
.arg
[0]) == RCon
382 && rtype(i
.arg
[1]) == RTmp
) {
383 emitf("imul%k %0, %1, %=", &i
, fn
, f
);
388 /* we have to use the negation trick to handle
389 * some 3-address substractions */
390 if (req(i
.to
, i
.arg
[1])) {
391 emitf("neg%k %=", &i
, fn
, f
);
392 emitf("add%k %0, %=", &i
, fn
, f
);
397 /* make sure we don't emit useless copies,
398 * also, we can use a trick to load 64-bits
399 * registers, it's detailed in my note below
400 * http://c9x.me/art/notes.html?09/19/2015 */
401 if (req(i
.to
, R
) || req(i
.arg
[0], R
))
404 && rtype(i
.arg
[0]) == RCon
406 && fn
->con
[i
.arg
[0].val
].type
== CBits
407 && (val
= fn
->con
[i
.arg
[0].val
].bits
.i
) >= 0
408 && val
<= UINT32_MAX
) {
409 emitf("movl %W0, %W=", &i
, fn
, f
);
410 } else if (isreg(i
.to
)
411 && rtype(i
.arg
[0]) == RCon
412 && fn
->con
[i
.arg
[0].val
].type
== CAddr
) {
413 emitf("lea%k %M0, %=", &i
, fn
, f
);
414 } else if (!req(i
.arg
[0], i
.to
))
415 emitf("mov%k %0, %=", &i
, fn
, f
);
418 /* calls simply have a weird syntax in AT&T
420 switch (rtype(i
.arg
[0])) {
422 fprintf(f
, "\tcallq ");
423 emitcon(&fn
->con
[i
.arg
[0].val
], f
);
427 emitf("callq *%L0", &i
, fn
, f
);
430 die("invalid call argument");
434 /* there is no good reason why this is here
435 * maybe we should split Osalloc in 2 different
436 * instructions depending on the result
438 emitf("subq %L0, %%rsp", &i
, fn
, f
);
440 emitcopy(i
.to
, TMP(RSP
), Kl
, fn
, f
);
443 if (KBASE(i
.cls
) == 0)
445 /* for floats, there is no swap instruction
446 * so we use xmm15 as a temporary
448 emitcopy(TMP(XMM0
+15), i
.arg
[0], i
.cls
, fn
, f
);
449 emitcopy(i
.arg
[0], i
.arg
[1], i
.cls
, fn
, f
);
450 emitcopy(i
.arg
[1], TMP(XMM0
+15), i
.cls
, fn
, f
);
459 default: die("invalid int comparison %d", cmp
);
460 case ICule
: return ICugt
;
461 case ICult
: return ICuge
;
462 case ICsle
: return ICsgt
;
463 case ICslt
: return ICsge
;
464 case ICsgt
: return ICsle
;
465 case ICsge
: return ICslt
;
466 case ICugt
: return ICule
;
467 case ICuge
: return ICult
;
468 case ICeq
: return ICne
;
469 case ICne
: return ICeq
;
470 case ICxnp
: return ICxp
;
471 case ICxp
: return ICxnp
;
480 /* specific to NAlign == 3 */
481 for (i
=0, o
=0; i
<NRClob
; i
++)
482 o
^= 1 & (fn
->reg
>> rclob
[i
]);
485 return 4*f
+ 8*o
+ 176*fn
->vararg
;
489 emitfn(Fn
*fn
, FILE *f
)
491 static char *ctoa
[] = {
510 fprintf(f
, ".text\n");
512 fprintf(f
, ".globl %s%s\n", symprefix
, fn
->name
);
516 "\tmovq %%rsp, %%rbp\n",
521 fprintf(f
, "\tsub $%d, %%rsp\n", fs
);
524 for (r
=rsave
; r
-rsave
<6; ++r
, o
+=8)
525 fprintf(f
, "\tmovq %%%s, %d(%%rbp)\n", rname
[*r
][0], o
);
526 for (n
=0; n
<8; ++n
, o
+=16)
527 fprintf(f
, "\tmovaps %%xmm%d, %d(%%rbp)\n", n
, o
);
529 for (r
=rclob
; r
-rclob
< NRClob
; r
++)
530 if (fn
->reg
& BIT(*r
)) {
531 itmp
.arg
[0] = TMP(*r
);
532 emitf("pushq %L0", &itmp
, fn
, f
);
535 for (b
=fn
->start
; b
; b
=b
->link
) {
536 fprintf(f
, "%sbb%d: /* %s */\n", locprefix
, id0
+b
->id
, b
->name
);
537 for (i
=b
->ins
; i
!=&b
->ins
[b
->nins
]; i
++)
539 switch (b
->jmp
.type
) {
541 for (r
=&rclob
[NRClob
]; r
>rclob
;)
542 if (fn
->reg
& BIT(*--r
)) {
543 itmp
.arg
[0] = TMP(*r
);
544 emitf("popq %L0", &itmp
, fn
, f
);
553 if (b
->s1
!= b
->link
)
554 fprintf(f
, "\tjmp %sbb%d /* %s */\n",
555 locprefix
, id0
+b
->s1
->id
, b
->s1
->name
);
558 c
= b
->jmp
.type
- Jxjc
;
559 if (0 <= c
&& c
<= NXICmp
) {
560 if (b
->link
== b
->s2
) {
566 fprintf(f
, "\tj%s %sbb%d /* %s */\n", ctoa
[c
],
567 locprefix
, id0
+b
->s2
->id
, b
->s2
->name
);
570 die("unhandled jump %d", b
->jmp
.type
);
577 emitdat(Dat
*d
, FILE *f
)
580 static char *dtoa
[] = {
591 fprintf(f
, ".data\n");
597 fprintf(f
, ".align 8\n");
599 fprintf(f
, ".globl %s%s\n", symprefix
, d
->u
.str
);
600 fprintf(f
, "%s%s:\n", symprefix
, d
->u
.str
);
603 fprintf(f
, "\t.fill %"PRId64
",1,0\n", d
->u
.num
);
606 if (d
->type
== DAlign
)
611 err("strings only supported for 'b' currently");
612 fprintf(f
, "\t.ascii \"%s\"\n", d
->u
.str
);
615 fprintf(f
, "%s %s%+"PRId64
"\n",
616 dtoa
[d
->type
], d
->u
.ref
.nam
,
620 fprintf(f
, "%s %"PRId64
"\n",
621 dtoa
[d
->type
], d
->u
.num
);
627 typedef struct FBits FBits
;
642 stashfp(int64_t n
, int w
)
647 /* does a dumb de-dup of fp constants
648 * this should be the linker's job */
649 for (pb
=&stash
, i
=0; (b
=*pb
); pb
=&b
->link
, i
++)
650 if (n
== b
->bits
.n
&& w
== b
->wide
)
652 b
= emalloc(sizeof *b
);
668 fprintf(f
, "/* floating point constants */\n");
669 fprintf(f
, ".data\n.align 8\n");
670 for (b
=stash
, i
=0; b
; b
=b
->link
, i
++)
676 locprefix
, i
, b
->bits
.n
,
679 for (b
=stash
, i
=0; b
; b
=b
->link
, i
++)
685 locprefix
, i
, b
->bits
.n
& 0xffffffff,