use rot:imm op2 for "add r0, #0xcc000000"
[neatas.git] / neatas.c
blob92d56c994cc51aac0cd7d7dbd466bd0daa270756
1 /*
2 * neatas - a small arm assembler
4 * Copyright (C) 2011 Ali Gholami Rudi
6 * This program is released under GNU GPL version 2.
7 */
8 #include <ctype.h>
9 #include <fcntl.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <unistd.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 #include "out.h"
18 #define BUFSIZE (1 << 14)
19 #define TOKLEN 128
21 #define DELIMS ",:{}[]#=-+ \t\n/!^"
22 #define TOK2(a) ((a)[0] << 16 | (a)[1] << 8)
23 #define TOK3(a) ((a)[0] << 16 | (a)[1] << 8 | (a)[2])
25 static char buf[BUFSIZE];
26 static int cur;
28 static char cs[BUFSIZE];
29 static int cslen;
31 static void gen(unsigned long i)
33 memcpy(cs + cslen, &i, 4);
34 cslen += 4;
37 static int tok_read(char *s)
39 while (1) {
40 while (isspace(buf[cur]))
41 cur++;
42 if (buf[cur] == '/' && buf[cur + 1] == '*') {
43 while (buf[cur] && (buf[cur] != '*' || buf[cur + 1] != '/'))
44 cur++;
45 continue;
47 if (buf[cur] == ';' || buf[cur] == '@') {
48 while (buf[cur] && buf[cur] != '\n')
49 cur++;
50 continue;
52 break;
54 if (!strchr(DELIMS, buf[cur])) {
55 while (!strchr(DELIMS, buf[cur]))
56 *s++ = buf[cur++];
57 *s = '\0';
58 return 0;
60 s[0] = buf[cur++];
61 s[1] = '\0';
62 return s[0] != 0;
65 static char tok[TOKLEN];
66 static char tokc[TOKLEN];
67 static int tok_next;
69 /* next token in lower-case */
70 static char *tok_get(void)
72 char *s = tokc;
73 char *d = tok;
74 if (!tok_next) {
75 tok_read(tokc);
76 while (*s)
77 *d++ = tolower(*s++);
78 *d = '\0';
80 tok_next = 0;
81 return tok;
84 /* next token in original case */
85 static char *tok_case(void)
87 tok_get();
88 return tokc;
91 /* have a look at the next token */
92 static char *tok_see(void)
94 if (!tok_next)
95 tok_get();
96 tok_next = 1;
97 return tok;
100 static char *digs = "0123456789abcdef";
102 static long num(char *s, int bits)
104 int b = 10;
105 int neg = 0;
106 long n = 0;
107 if (*s == '-' || *s == '+') {
108 neg = *s == '-';
109 s++;
111 if (s[0] == '0' && s[1] == 'x') {
112 b = 16;
113 s += 2;
115 while (*s) {
116 int d = strchr(digs, *s) - digs;
117 n *= b;
118 n += d;
119 s++;
121 if (neg)
122 n = -n;
123 return bits < 32 ? n & ((1ul << bits) - 1) : n;
126 #define NLOCALS 1024
127 #define NEXTERNS 1024
128 #define NAMELEN 32
130 static char locals[NLOCALS][NAMELEN];
131 static char loffs[NLOCALS];
132 static int nlocals;
133 static char externs[NEXTERNS][NAMELEN];
134 static int nexterns;
135 static char globals[NEXTERNS][NAMELEN];
136 static int nglobals;
138 static void label_extern(char *name)
140 int idx = nexterns++;
141 strcpy(externs[idx], name);
144 static void label_global(char *name)
146 int idx = nglobals++;
147 strcpy(globals[idx], name);
150 static void label_local(char *name)
152 int idx = nlocals++;
153 strcpy(locals[idx], name);
154 loffs[idx] = cslen;
155 out_sym(locals[idx], OUT_CS, loffs[idx], 0);
158 static int label_isextern(char *name)
160 int i;
161 for (i = 0; i < nexterns; i++)
162 if (!strcmp(name, externs[i]))
163 return 1;
164 return 0;
167 static int label_offset(char *name)
169 int i;
170 for (i = 0; i < nlocals; i++)
171 if (!strcmp(name, locals[i]))
172 return loffs[i];
173 return 0;
176 static void label_write(void)
178 int i;
179 for (i = 0; i < nglobals; i++)
180 out_sym(globals[i], OUT_GLOB | OUT_CS,
181 label_offset(globals[i]), 0);
184 #define NRELOCS 1024
186 /* absolute relocations */
187 static char absns[NRELOCS][NAMELEN]; /* symbol name */
188 static long absos[NRELOCS]; /* relocation location */
189 static int nabs;
190 /* relative relocations */
191 static char relns[NRELOCS][NAMELEN]; /* symbol name */
192 static long relos[NRELOCS]; /* relocation location */
193 static long relas[NRELOCS]; /* relocation addend */
194 static long relbs[NRELOCS]; /* relocation bits: ldrh=8, 12=ldr, 24=bl */
195 static int nrel;
197 static void reloc_rel(char *name, long off, int bits)
199 int idx = nrel++;
200 strcpy(relns[idx], name);
201 relos[idx] = cslen;
202 relas[idx] = off;
203 relbs[idx] = bits;
206 static void reloc_abs(char *name)
208 int idx = nabs++;
209 strcpy(absns[idx], name);
210 absos[idx] = cslen;
213 #define CSBEG_NAME "__neatas_cs"
215 /* fill immediate value for bl instruction */
216 static void bl_imm(long *dst, long imm)
218 imm = ((*dst << 2) + imm) >> 2;
219 *dst = (*dst & 0xff000000) | (imm & 0x00ffffff);
222 /* fill immediate value for ldr instruction */
223 static void ldr_imm(long *dst, long imm, int half)
225 /* set u-bit for negative offsets */
226 if (imm < 0) {
227 *dst ^= (1 << 23);
228 imm = -imm;
230 if (!half)
231 *dst = (*dst & 0xfffff000) | ((*dst + imm) & 0x00000fff);
232 if (half)
233 *dst = (*dst & 0xfffff0f0) |
234 (imm & 0x0f) | ((imm & 0xf0) << 4);
237 static void reloc_write(void)
239 int i;
240 out_sym(CSBEG_NAME, OUT_CS, 0, 0);
241 for (i = 0; i < nabs; i++) {
242 if (label_isextern(absns[i])) {
243 out_rel(absns[i], OUT_CS, absos[i]);
244 } else {
245 long off = label_offset(absns[i]);
246 out_rel(CSBEG_NAME, OUT_CS, absos[i]);
247 *(long *) (cs + absos[i]) += off;
250 for (i = 0; i < nrel; i++) {
251 long *dst = (void *) cs + relos[i];
252 long off;
253 if (label_isextern(relns[i])) {
254 out_rel(relns[i], OUT_CS | OUT_REL24, relos[i]);
255 bl_imm(dst, relas[i] - 8);
256 continue;
258 off = relas[i] + label_offset(relns[i]) - relos[i] - 8;
259 /* bl instruction */
260 if (relbs[i] == 24)
261 bl_imm(dst, off);
262 else
263 ldr_imm(dst, off, relbs[i] == 8);
267 #define NDATS 1024
269 /* data pool */
270 static long dat_offs[NDATS]; /* data immediate value */
271 static long dat_locs[NDATS]; /* address of pointing ldr */
272 static char dat_names[NDATS][NAMELEN]; /* relocation data symbol name */
273 static int ndats;
275 static void pool_num(long num)
277 int idx = ndats++;
278 dat_offs[idx] = num;
279 dat_locs[idx] = cslen;
282 static void pool_reloc(char *name, long off)
284 int idx = ndats++;
285 dat_offs[idx] = off;
286 dat_locs[idx] = cslen;
287 strcpy(dat_names[idx], name);
290 static void pool_write(void)
292 int i;
293 for (i = 0; i < ndats; i++) {
294 if (dat_names[i]) {
295 long *loc = (void *) cs + dat_locs[i];
296 int off = cslen - dat_locs[i] - 8;
297 reloc_abs(dat_names[i]);
298 /* ldrh needs special care */
299 if (*loc & (1 << 26))
300 *loc = (*loc & 0xfffff000) | (off & 0x00000fff);
301 else
302 *loc = (*loc & 0xfffff0f0) | (off & 0x0f) |
303 ((off & 0xf0) << 4);
305 gen(dat_offs[i]);
309 static char *dpops[] = {
310 "and", "eor", "sub", "rsb", "add", "adc", "sbc", "rsc",
311 "tst", "teq", "cmp", "cmn", "orr", "mov", "bic", "mvn"
314 static char *conds[] = {
315 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
316 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
319 static char *regs[] = {
320 "a1", "a2", "a3", "a4", "v1", "v2", "v3", "v4",
321 "v5", "v6", "v7", "v8", "ip", "sp", "lr", "pc"
324 static int get_reg(char *s)
326 int i;
327 if (s[0] == 'f' && s[1] == 'p')
328 return 11;
329 for (i = 0; i < 16; i++)
330 if (TOK2(s) == TOK2(regs[i]))
331 return i;
332 if (s[0] == 'r')
333 return atoi(s + 1);
334 return -1;
337 static void fill_buf(int fd)
339 int len = 0;
340 int nr;
341 while ((nr = read(fd, buf + len, sizeof(buf) - len - 1)) > 0)
342 len += nr;
343 buf[len] = '\0';
346 static int tok_jmp(char *s)
348 if (!strcmp(s, tok_see())) {
349 tok_get();
350 return 0;
352 return 1;
355 static void tok_expect(char *s)
357 if (strcmp(s, tok_get())) {
358 fprintf(stderr, "syntax error\n");
359 exit(1);
363 static int get_cond(char *s)
365 int i;
366 if (s[0] == 'h' && s[1] == 's')
367 return 2;
368 if (s[0] == 'l' && s[1] == 'o')
369 return 3;
370 for (i = 0; i < 16; i++)
371 if (TOK2(s) == TOK2(conds[i]))
372 return i;
373 return -1;
376 static int add_op(char *s)
378 int i;
379 for (i = 0; i < 16; i++)
380 if (TOK3(s) == TOK3(dpops[i]))
381 return i;
382 return -1;
385 static int shiftmode(char *s)
387 if (TOK3(s) == TOK3("lsl"))
388 return 0;
389 if (TOK3(s) == TOK3("lsr"))
390 return 1;
391 if (TOK3(s) == TOK3("asr"))
392 return 2;
393 if (TOK3(s) == TOK3("ror"))
394 return 3;
395 return 0;
398 static int ldr_word(void)
400 int sm = 0;
401 int rm;
402 int shifts = 0;
403 int u = 1;
404 if (tok_jmp(","))
405 return 0;
406 if (!tok_jmp("#")) {
407 u = tok_jmp("-");
408 return (u << 23) | num(tok_get(), 12);
410 if (!tok_jmp("-"))
411 u = 0;
412 rm = get_reg(tok_get());
413 if (!tok_jmp(",")) {
414 sm = shiftmode(tok_get());
415 tok_expect("#");
416 shifts = num(tok_get(), 8);
418 return (1 << 25) | (u << 23) | (shifts << 7) | (sm << 5) | rm;
421 static int ldr_half(int s, int h)
423 int u, n;
424 int o = 0x90 | (s << 6) | (h << 5);
425 if (tok_jmp(","))
426 return o | (1 << 22);
427 if (!tok_jmp("#")) {
428 u = tok_jmp("-");
429 n = num(tok_get(), 8);
430 return o | (1 << 22) | (u << 23) | (n & 0x0f) | ((n & 0xf0) << 4);
432 u = tok_jmp("-");
433 return o | (u << 23) | get_reg(tok_get());
436 static long ldr_off(void)
438 long off = 0;
439 while (1) {
440 if (!tok_jmp("-")) {
441 off -= num(tok_get(), 32);
442 continue;
444 if (!tok_jmp("+")) {
445 off += num(tok_get(), 32);
446 continue;
448 break;
450 return off;
454 * single data transfer:
455 * +------------------------------------------+
456 * |COND|01|I|P|U|B|W|L| Rn | Rd | offset |
457 * +------------------------------------------+
459 * I: immediate/offset
460 * P: post/pre indexing
461 * U: down/up
462 * B: byte/word
463 * W: writeback
464 * L: store/load
465 * Rn: base register
466 * Rd: source/destination register
468 * I=1 offset=| immediate |
469 * I=0 offset=| shift | Rm |
471 * halfword and signed data transfer
472 * +----------------------------------------------+
473 * |COND|000|P|U|0|W|L| Rn | Rd |0000|1|S|H|1| Rm |
474 * +----------------------------------------------+
476 * +----------------------------------------------+
477 * |COND|000|P|U|1|W|L| Rn | Rd |off1|1|S|H|1|off2|
478 * +----------------------------------------------+
480 * S: singed
481 * H: halfword
483 static int ldr(char *cmd)
485 int l = 0;
486 int rd, rn;
487 int cond;
488 int w = 0;
489 int sign = 0;
490 int byte = 0;
491 int half = 0;
492 int o;
493 if (TOK3(cmd) != TOK3("ldr") && TOK3(cmd) != TOK3("str"))
494 return 1;
495 if (TOK3(cmd) == TOK3("ldr"))
496 l = 1;
497 cond = get_cond(cmd + 3);
498 cmd += cond < 0 ? 2 : 5;
499 if (cond < 0)
500 cond = 14;
501 while (*++cmd) {
502 if (*cmd == 't')
503 w = 1;
504 if (*cmd == 'b')
505 byte = 1;
506 if (*cmd == 'h')
507 half = 1;
508 if (*cmd == 's')
509 sign = 1;
511 rd = get_reg(tok_get());
512 tok_expect(",");
513 o = (cond << 28) | (l << 20) | (rd << 12) | (half << 5) | (sign << 6);
514 if (half || sign)
515 o |= 0x90;
516 else
517 o |= (1 << 26);
518 if (tok_jmp("[")) {
519 char sym[NAMELEN];
520 rn = 15;
521 if (!tok_jmp("=")) {
522 strcpy(sym, tok_case());
523 pool_reloc(sym, ldr_off());
524 } else {
525 strcpy(sym, tok_case());
526 reloc_rel(sym, ldr_off(), (half || sign) ? 8 : 12);
528 if (half || sign)
529 o |= (1 << 22);
530 else
531 o |= (1 << 26);
532 gen(o | (1 << 23) | (1 << 24) | (rn << 16));
533 return 0;
535 rn = get_reg(tok_get());
536 o |= (rn << 16);
537 if (!tok_jmp("]")) {
538 gen(o | (w << 21) | ((half || sign) ? ldr_half(sign, half) :
539 ldr_word()));
540 return 0;
542 o |= (1 << 24) | ((half || sign) ? ldr_half(sign, half) : ldr_word());
543 tok_expect("]");
544 if (!tok_jmp("!"))
545 o |= (1 << 21);
546 gen(o);
547 return 0;
550 static int ldm_regs(void)
552 int o = 0;
553 tok_expect("{");
554 while (1) {
555 int r1 = get_reg(tok_get());
556 int r2 = r1;
557 int i;
558 if (!tok_jmp("-"))
559 r2 = get_reg(tok_get());
560 for (i = r1; i <= r2; i++)
561 o |= (1 << i);
562 if (tok_jmp(","))
563 break;
565 tok_expect("}");
566 return o;
569 static int ldm_type(char *s, int l)
571 int p = 0;
572 int u = 0;
573 if (*s == 'i' || *s == 'd') {
574 p = s[0] == 'i';
575 u = s[1] == 'b';
576 } else {
577 p = s[0] == (l ? 'e' : 'f');
578 u = s[1] == (l ? 'd' : 'a');
580 return (p << 24) | (u << 23);
584 * block data transfer
585 * +----------------------------------------+
586 * |COND|100|P|U|S|W|L| Rn | reg list |
587 * +----------------------------------------+
589 * P: post/pre indexing
590 * U: down/up
591 * S: PSR/user bit
592 * W: write back
593 * L: load/store
594 * Rn: base register
596 static int ldm(char *cmd)
598 int rn;
599 int cond;
600 int l = 0, w = 0, s = 0;
601 int o = 4 << 25;
602 if (TOK3(cmd) != TOK3("ldm") && TOK3(cmd) != TOK3("stm"))
603 return 1;
604 if (TOK3(cmd) == TOK3("ldm"))
605 l = 1;
606 cond = get_cond(cmd + 3);
607 o |= ldm_type(cond < 0 ? cmd + 3 : cmd + 5, l);
608 rn = get_reg(tok_get());
609 if (!tok_jmp("!"))
610 w = 1;
611 tok_expect(",");
612 if (cond < 0)
613 cond = 14;
614 o |= ldm_regs();
615 if (!tok_jmp("^"))
616 s = 1;
617 gen(o | (cond << 28) | (s << 22) | (w << 21) | (l << 20) | (rn << 16));
618 return 0;
621 static int add_encimm(unsigned n)
623 int i = 0;
624 while (i < 12 && (n >> ((4 + i) << 1)))
625 i++;
626 return (n >> (i << 1)) | (((16 - i) & 0x0f) << 8);
629 static int add_op2(void)
631 int sm, rm;
632 if (!tok_jmp("#"))
633 return (1 << 25) | add_encimm(num(tok_get(), 32));
634 rm = get_reg(tok_get());
635 if (tok_jmp(","))
636 return rm;
637 sm = shiftmode(tok_get());
638 if (!tok_jmp("#"))
639 return (num(tok_get(), 4) << 7) | (sm << 5) | (rm << 0);
640 return (get_reg(tok_get()) << 8) | (sm << 5) | (1 << 4) | (rm << 0);
644 * data processing:
645 * +---------------------------------------+
646 * |COND|00|I| op |S| Rn | Rd | operand2 |
647 * +---------------------------------------+
649 * S: set condition code
650 * Rn: first operand
651 * Rd: destination operand
653 * I=0 operand2=| shift | Rm |
654 * I=1 operand2=|rota| imm |
656 static int add(char *cmd)
658 int op, cond;
659 int rd = 0, rn = 0;
660 int nops = 2;
661 int s = 0;
662 op = add_op(cmd);
663 if (op < 0)
664 return 1;
665 cond = get_cond(cmd + 3);
666 s = cmd[cond < 0 ? 3 : 6] == 's';
667 if (op == 13 || op == 15)
668 nops = 1;
669 if ((op & 0x0c) == 0x08)
670 s = 1;
671 if (cond < 0)
672 cond = 14;
673 if ((op & 0xc) != 0x8) {
674 rd = get_reg(tok_get());
675 tok_expect(",");
677 if (nops > 1) {
678 rn = get_reg(tok_get());
679 tok_expect(",");
681 gen((cond << 28) | (s << 20) | (op << 21) | (rn << 16) | (rd << 12) | add_op2());
682 return 0;
686 * multiply
687 * +----------------------------------------+
688 * |COND|000000|A|S| Rd | Rn | Rs |1001| Rm |
689 * +----------------------------------------+
691 * Rd: destination
692 * A: accumulate
693 * C: set condition codes
695 * I=0 operand2=| shift | Rm |
696 * I=1 operand2=|rota| imm |
698 static int mul(char *cmd)
700 int cond;
701 int rd, rm, rs, rn = 0;
702 int s = 0;
703 int a = 0;
704 if (TOK3(cmd) != TOK3("mul") && TOK3(cmd) != TOK3("mla"))
705 return 1;
706 if (TOK3(cmd) == TOK3("mla"))
707 a = 1;
708 cond = get_cond(cmd + 3);
709 s = cmd[cond < 0 ? 3 : 6] == 's';
710 if (cond < 0)
711 cond = 14;
712 rd = get_reg(tok_get());
713 tok_expect(",");
714 rm = get_reg(tok_get());
715 tok_expect(",");
716 rs = get_reg(tok_get());
717 if (a) {
718 tok_expect(",");
719 rn = get_reg(tok_get());
721 gen((cond << 28) | (a << 21) | (s << 20) | (rd << 16) |
722 (rn << 12) | (rs << 8) | (9 << 4) | (rm << 0));
723 return 0;
727 * software interrupt:
728 * +----------------------------------+
729 * |COND|1111| |
730 * +----------------------------------+
733 static int swi(char *cmd)
735 int n;
736 int cond;
737 if (TOK3(cmd) != TOK3("swi"))
738 return 1;
739 cond = get_cond(cmd + 3);
740 if (cond == -1)
741 cond = 14;
742 tok_jmp("#");
743 n = num(tok_get(), 24);
744 gen((cond << 28) | (0xf << 24) | n);
745 return 0;
749 * branch:
750 * +-----------------------------------+
751 * |COND|101|L| offset |
752 * +-----------------------------------+
754 * L: link
756 static int bl(char *cmd)
758 int l = 0;
759 int cond;
760 char sym[NAMELEN];
761 if (*cmd++ != 'b')
762 return 1;
763 if (*cmd == 'l') {
764 l = 1;
765 cmd++;
767 cond = get_cond(cmd);
768 if (cond == -1)
769 cond = 14;
770 strcpy(sym, tok_case());
771 reloc_rel(sym, ldr_off(), 24);
772 gen((cond << 28) | (5 << 25) | (l << 24));
773 return 0;
777 * move PSR to a register
778 * +-------------------------------------+
779 * |COND|00010|P|001111| Rd |000000000000|
780 * +-------------------------------------+
782 * move a register to PSR
783 * +--------------------------------------+
784 * |COND|00|I|10|P|1010001111| source op |
785 * +--------------------------------------+
787 * P: CPSR/SPSR_cur
789 * I=0 source=|00000000| Rm |
790 * I=1 source=|rot | imm_u8 |
792 static int msr(char *cmd)
794 return 1;
797 static int directive(char *cmd)
799 if (cmd[0] != '.')
800 return 1;
801 if (!strcmp(".extern", cmd)) {
802 label_extern(tok_case());
804 if (!strcmp(".global", cmd)) {
805 label_global(tok_case());
807 if (!strcmp(".word", cmd)) {
808 do {
809 if (!tok_jmp("=")) {
810 reloc_abs(tok_case());
811 gen(ldr_off());
812 } else {
813 gen(num(tok_get(), 32));
815 } while (!tok_jmp(","));
817 return 0;
820 static int stmt(void)
822 char first[TOKLEN];
823 char first_case[TOKLEN];
824 strcpy(first, tok_see());
825 strcpy(first_case, tok_case());
826 /* a label */
827 if (!tok_jmp(":")) {
828 label_local(first_case);
829 return 0;
831 if (!directive(first))
832 return 0;
833 if (!add(first))
834 return 0;
835 if (!mul(first))
836 return 0;
837 if (!ldr(first))
838 return 0;
839 if (!ldm(first))
840 return 0;
841 if (!msr(first))
842 return 0;
843 if (!swi(first))
844 return 0;
845 if (!bl(first))
846 return 0;
847 return 1;
850 int main(int argc, char *argv[])
852 char obj[128] = "";
853 char *src;
854 int ofd, ifd;
855 int i = 1;
856 while (i < argc && argv[i][0] == '-') {
857 if (argv[i][1] == 'o')
858 strcpy(obj, argv[++i]);
859 i++;
861 if (i == argc) {
862 fprintf(stderr, "neatcc: no file given\n");
863 return 1;
865 src = argv[i];
866 ifd = open(src, O_RDONLY);
867 fill_buf(ifd);
868 close(ifd);
869 out_init(0);
870 while (!stmt())
872 label_write();
873 pool_write();
874 reloc_write();
875 if (!*obj) {
876 char *s = obj;
877 strcpy(obj, src);
878 while (*s && *s != '.')
879 s++;
880 *s++ = '.';
881 *s++ = 'o';
882 *s++ = '\0';
884 ofd = open(obj, O_WRONLY | O_TRUNC | O_CREAT, 0600);
885 out_write(ofd, cs, cslen, cs, 0);
886 close(ofd);
887 return 0;