2 * neatas - a small arm assembler
4 * Copyright (C) 2011 Ali Gholami Rudi
6 * This program is released under GNU GPL version 2.
14 #include <sys/types.h>
18 #define BUFSIZE (1 << 14)
21 #define DELIMS ",:{}[]#=-+ \t\n/!^"
22 #define TOK2(a) ((a)[0] << 16 | (a)[1] << 8)
23 #define TOK3(a) ((a)[0] << 16 | (a)[1] << 8 | (a)[2])
25 static char buf
[BUFSIZE
];
28 static char cs
[BUFSIZE
];
31 static void gen(unsigned long i
)
33 memcpy(cs
+ cslen
, &i
, 4);
37 static int tok_read(char *s
)
40 while (isspace(buf
[cur
]))
42 if (buf
[cur
] == '/' && buf
[cur
+ 1] == '*') {
43 while (buf
[cur
] && (buf
[cur
] != '*' || buf
[cur
+ 1] != '/'))
47 if (buf
[cur
] == ';' || buf
[cur
] == '@') {
48 while (buf
[cur
] && buf
[cur
] != '\n')
54 if (!strchr(DELIMS
, buf
[cur
])) {
55 while (!strchr(DELIMS
, buf
[cur
]))
65 static char tok
[TOKLEN
];
66 static char tokc
[TOKLEN
];
69 /* next token in lower-case */
70 static char *tok_get(void)
84 /* next token in original case */
85 static char *tok_case(void)
91 /* have a look at the next token */
92 static char *tok_see(void)
100 static char *digs
= "0123456789abcdef";
102 static long num(char *s
, int bits
)
107 if (*s
== '-' || *s
== '+') {
111 if (s
[0] == '0' && s
[1] == 'x') {
116 int d
= strchr(digs
, *s
) - digs
;
123 return bits
< 32 ? n
& ((1ul << bits
) - 1) : n
;
127 #define NEXTERNS 1024
130 static char locals
[NLOCALS
][NAMELEN
];
131 static char loffs
[NLOCALS
];
133 static char externs
[NEXTERNS
][NAMELEN
];
135 static char globals
[NEXTERNS
][NAMELEN
];
138 static void label_extern(char *name
)
140 int idx
= nexterns
++;
141 strcpy(externs
[idx
], name
);
144 static void label_global(char *name
)
146 int idx
= nglobals
++;
147 strcpy(globals
[idx
], name
);
150 static void label_local(char *name
)
153 strcpy(locals
[idx
], name
);
155 out_sym(locals
[idx
], OUT_CS
, loffs
[idx
], 0);
158 static int label_isextern(char *name
)
161 for (i
= 0; i
< nexterns
; i
++)
162 if (!strcmp(name
, externs
[i
]))
167 static int label_offset(char *name
)
170 for (i
= 0; i
< nlocals
; i
++)
171 if (!strcmp(name
, locals
[i
]))
176 static void label_write(void)
179 for (i
= 0; i
< nglobals
; i
++)
180 out_sym(globals
[i
], OUT_GLOB
| OUT_CS
,
181 label_offset(globals
[i
]), 0);
186 /* absolute relocations */
187 static char absns
[NRELOCS
][NAMELEN
]; /* symbol name */
188 static long absos
[NRELOCS
]; /* relocation location */
190 /* relative relocations */
191 static char relns
[NRELOCS
][NAMELEN
]; /* symbol name */
192 static long relos
[NRELOCS
]; /* relocation location */
193 static long relas
[NRELOCS
]; /* relocation addend */
194 static long relbs
[NRELOCS
]; /* relocation bits: ldrh=8, 12=ldr, 24=bl */
197 static void reloc_rel(char *name
, long off
, int bits
)
200 strcpy(relns
[idx
], name
);
206 static void reloc_abs(char *name
)
209 strcpy(absns
[idx
], name
);
213 #define CSBEG_NAME "__neatas_cs"
215 /* fill immediate value for bl instruction */
216 static void bl_imm(long *dst
, long imm
)
218 imm
= ((*dst
<< 2) + imm
) >> 2;
219 *dst
= (*dst
& 0xff000000) | (imm
& 0x00ffffff);
222 /* fill immediate value for ldr instruction */
223 static void ldr_imm(long *dst
, long imm
, int half
)
225 /* set u-bit for negative offsets */
231 *dst
= (*dst
& 0xfffff000) | ((*dst
+ imm
) & 0x00000fff);
233 *dst
= (*dst
& 0xfffff0f0) |
234 (imm
& 0x0f) | ((imm
& 0xf0) << 4);
237 static void reloc_write(void)
240 out_sym(CSBEG_NAME
, OUT_CS
, 0, 0);
241 for (i
= 0; i
< nabs
; i
++) {
242 if (label_isextern(absns
[i
])) {
243 out_rel(absns
[i
], OUT_CS
, absos
[i
]);
245 long off
= label_offset(absns
[i
]);
246 out_rel(CSBEG_NAME
, OUT_CS
, absos
[i
]);
247 *(long *) (cs
+ absos
[i
]) += off
;
250 for (i
= 0; i
< nrel
; i
++) {
251 long *dst
= (void *) cs
+ relos
[i
];
253 if (label_isextern(relns
[i
])) {
254 out_rel(relns
[i
], OUT_CS
| OUT_REL24
, relos
[i
]);
255 bl_imm(dst
, relas
[i
] - 8);
258 off
= relas
[i
] + label_offset(relns
[i
]) - relos
[i
] - 8;
263 ldr_imm(dst
, off
, relbs
[i
] == 8);
270 static long dat_offs
[NDATS
]; /* data immediate value */
271 static long dat_locs
[NDATS
]; /* address of pointing ldr */
272 static char dat_names
[NDATS
][NAMELEN
]; /* relocation data symbol name */
275 static void pool_num(long num
)
279 dat_locs
[idx
] = cslen
;
282 static void pool_reloc(char *name
, long off
)
286 dat_locs
[idx
] = cslen
;
287 strcpy(dat_names
[idx
], name
);
290 static void pool_write(void)
293 for (i
= 0; i
< ndats
; i
++) {
295 long *loc
= (void *) cs
+ dat_locs
[i
];
296 int off
= cslen
- dat_locs
[i
] - 8;
297 reloc_abs(dat_names
[i
]);
298 /* ldrh needs special care */
299 if (*loc
& (1 << 26))
300 *loc
= (*loc
& 0xfffff000) | (off
& 0x00000fff);
302 *loc
= (*loc
& 0xfffff0f0) | (off
& 0x0f) |
309 static char *dpops
[] = {
310 "and", "eor", "sub", "rsb", "add", "adc", "sbc", "rsc",
311 "tst", "teq", "cmp", "cmn", "orr", "mov", "bic", "mvn"
314 static char *conds
[] = {
315 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
316 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
319 static char *regs
[] = {
320 "a1", "a2", "a3", "a4", "v1", "v2", "v3", "v4",
321 "v5", "v6", "v7", "v8", "ip", "sp", "lr", "pc"
324 static int get_reg(char *s
)
327 if (s
[0] == 'f' && s
[1] == 'p')
329 for (i
= 0; i
< 16; i
++)
330 if (TOK2(s
) == TOK2(regs
[i
]))
337 static void fill_buf(int fd
)
341 while ((nr
= read(fd
, buf
+ len
, sizeof(buf
) - len
- 1)) > 0)
346 static int tok_jmp(char *s
)
348 if (!strcmp(s
, tok_see())) {
355 static void tok_expect(char *s
)
357 if (strcmp(s
, tok_get())) {
358 fprintf(stderr
, "syntax error\n");
363 static int get_cond(char *s
)
366 if (s
[0] == 'h' && s
[1] == 's')
368 if (s
[0] == 'l' && s
[1] == 'o')
370 for (i
= 0; i
< 16; i
++)
371 if (TOK2(s
) == TOK2(conds
[i
]))
376 static int add_op(char *s
)
379 for (i
= 0; i
< 16; i
++)
380 if (TOK3(s
) == TOK3(dpops
[i
]))
385 static int shiftmode(char *s
)
387 if (TOK3(s
) == TOK3("lsl"))
389 if (TOK3(s
) == TOK3("lsr"))
391 if (TOK3(s
) == TOK3("asr"))
393 if (TOK3(s
) == TOK3("ror"))
398 static int ldr_word(void)
408 return (u
<< 23) | num(tok_get(), 12);
412 rm
= get_reg(tok_get());
414 sm
= shiftmode(tok_get());
416 shifts
= num(tok_get(), 8);
418 return (1 << 25) | (u
<< 23) | (shifts
<< 7) | (sm
<< 5) | rm
;
421 static int ldr_half(int s
, int h
)
424 int o
= 0x90 | (s
<< 6) | (h
<< 5);
426 return o
| (1 << 22);
429 n
= num(tok_get(), 8);
430 return o
| (1 << 22) | (u
<< 23) | (n
& 0x0f) | ((n
& 0xf0) << 4);
433 return o
| (u
<< 23) | get_reg(tok_get());
436 static long ldr_off(void)
441 off
-= num(tok_get(), 32);
445 off
+= num(tok_get(), 32);
454 * single data transfer:
455 * +------------------------------------------+
456 * |COND|01|I|P|U|B|W|L| Rn | Rd | offset |
457 * +------------------------------------------+
459 * I: immediate/offset
460 * P: post/pre indexing
466 * Rd: source/destination register
468 * I=1 offset=| immediate |
469 * I=0 offset=| shift | Rm |
471 * halfword and signed data transfer
472 * +----------------------------------------------+
473 * |COND|000|P|U|0|W|L| Rn | Rd |0000|1|S|H|1| Rm |
474 * +----------------------------------------------+
476 * +----------------------------------------------+
477 * |COND|000|P|U|1|W|L| Rn | Rd |off1|1|S|H|1|off2|
478 * +----------------------------------------------+
483 static int ldr(char *cmd
)
493 if (TOK3(cmd
) != TOK3("ldr") && TOK3(cmd
) != TOK3("str"))
495 if (TOK3(cmd
) == TOK3("ldr"))
497 cond
= get_cond(cmd
+ 3);
498 cmd
+= cond
< 0 ? 2 : 5;
511 rd
= get_reg(tok_get());
513 o
= (cond
<< 28) | (l
<< 20) | (rd
<< 12) | (half
<< 5) | (sign
<< 6);
522 strcpy(sym
, tok_case());
523 pool_reloc(sym
, ldr_off());
525 strcpy(sym
, tok_case());
526 reloc_rel(sym
, ldr_off(), (half
|| sign
) ? 8 : 12);
532 gen(o
| (1 << 23) | (1 << 24) | (rn
<< 16));
535 rn
= get_reg(tok_get());
538 gen(o
| (w
<< 21) | ((half
|| sign
) ? ldr_half(sign
, half
) :
542 o
|= (1 << 24) | ((half
|| sign
) ? ldr_half(sign
, half
) : ldr_word());
550 static int ldm_regs(void)
555 int r1
= get_reg(tok_get());
559 r2
= get_reg(tok_get());
560 for (i
= r1
; i
<= r2
; i
++)
569 static int ldm_type(char *s
, int l
)
573 if (*s
== 'i' || *s
== 'd') {
577 p
= s
[0] == (l
? 'e' : 'f');
578 u
= s
[1] == (l
? 'd' : 'a');
580 return (p
<< 24) | (u
<< 23);
584 * block data transfer
585 * +----------------------------------------+
586 * |COND|100|P|U|S|W|L| Rn | reg list |
587 * +----------------------------------------+
589 * P: post/pre indexing
596 static int ldm(char *cmd
)
600 int l
= 0, w
= 0, s
= 0;
602 if (TOK3(cmd
) != TOK3("ldm") && TOK3(cmd
) != TOK3("stm"))
604 if (TOK3(cmd
) == TOK3("ldm"))
606 cond
= get_cond(cmd
+ 3);
607 o
|= ldm_type(cond
< 0 ? cmd
+ 3 : cmd
+ 5, l
);
608 rn
= get_reg(tok_get());
617 gen(o
| (cond
<< 28) | (s
<< 22) | (w
<< 21) | (l
<< 20) | (rn
<< 16));
621 static int add_encimm(unsigned n
)
624 while (i
< 12 && (n
>> ((4 + i
) << 1)))
626 return (n
>> (i
<< 1)) | (((16 - i
) & 0x0f) << 8);
629 static int add_op2(void)
633 return (1 << 25) | add_encimm(num(tok_get(), 32));
634 rm
= get_reg(tok_get());
637 sm
= shiftmode(tok_get());
639 return (num(tok_get(), 4) << 7) | (sm
<< 5) | (rm
<< 0);
640 return (get_reg(tok_get()) << 8) | (sm
<< 5) | (1 << 4) | (rm
<< 0);
645 * +---------------------------------------+
646 * |COND|00|I| op |S| Rn | Rd | operand2 |
647 * +---------------------------------------+
649 * S: set condition code
651 * Rd: destination operand
653 * I=0 operand2=| shift | Rm |
654 * I=1 operand2=|rota| imm |
656 static int add(char *cmd
)
665 cond
= get_cond(cmd
+ 3);
666 s
= cmd
[cond
< 0 ? 3 : 6] == 's';
667 if (op
== 13 || op
== 15)
669 if ((op
& 0x0c) == 0x08)
673 if ((op
& 0xc) != 0x8) {
674 rd
= get_reg(tok_get());
678 rn
= get_reg(tok_get());
681 gen((cond
<< 28) | (s
<< 20) | (op
<< 21) | (rn
<< 16) | (rd
<< 12) | add_op2());
687 * +----------------------------------------+
688 * |COND|000000|A|S| Rd | Rn | Rs |1001| Rm |
689 * +----------------------------------------+
693 * C: set condition codes
695 * I=0 operand2=| shift | Rm |
696 * I=1 operand2=|rota| imm |
698 static int mul(char *cmd
)
701 int rd
, rm
, rs
, rn
= 0;
704 if (TOK3(cmd
) != TOK3("mul") && TOK3(cmd
) != TOK3("mla"))
706 if (TOK3(cmd
) == TOK3("mla"))
708 cond
= get_cond(cmd
+ 3);
709 s
= cmd
[cond
< 0 ? 3 : 6] == 's';
712 rd
= get_reg(tok_get());
714 rm
= get_reg(tok_get());
716 rs
= get_reg(tok_get());
719 rn
= get_reg(tok_get());
721 gen((cond
<< 28) | (a
<< 21) | (s
<< 20) | (rd
<< 16) |
722 (rn
<< 12) | (rs
<< 8) | (9 << 4) | (rm
<< 0));
727 * software interrupt:
728 * +----------------------------------+
730 * +----------------------------------+
733 static int swi(char *cmd
)
737 if (TOK3(cmd
) != TOK3("swi"))
739 cond
= get_cond(cmd
+ 3);
743 n
= num(tok_get(), 24);
744 gen((cond
<< 28) | (0xf << 24) | n
);
750 * +-----------------------------------+
751 * |COND|101|L| offset |
752 * +-----------------------------------+
756 static int bl(char *cmd
)
767 cond
= get_cond(cmd
);
770 strcpy(sym
, tok_case());
771 reloc_rel(sym
, ldr_off(), 24);
772 gen((cond
<< 28) | (5 << 25) | (l
<< 24));
777 * move PSR to a register
778 * +-------------------------------------+
779 * |COND|00010|P|001111| Rd |000000000000|
780 * +-------------------------------------+
782 * move a register to PSR
783 * +--------------------------------------+
784 * |COND|00|I|10|P|1010001111| source op |
785 * +--------------------------------------+
789 * I=0 source=|00000000| Rm |
790 * I=1 source=|rot | imm_u8 |
792 static int msr(char *cmd
)
797 static int directive(char *cmd
)
801 if (!strcmp(".extern", cmd
)) {
802 label_extern(tok_case());
804 if (!strcmp(".global", cmd
)) {
805 label_global(tok_case());
807 if (!strcmp(".word", cmd
)) {
810 reloc_abs(tok_case());
813 gen(num(tok_get(), 32));
815 } while (!tok_jmp(","));
820 static int stmt(void)
823 char first_case
[TOKLEN
];
824 strcpy(first
, tok_see());
825 strcpy(first_case
, tok_case());
828 label_local(first_case
);
831 if (!directive(first
))
850 int main(int argc
, char *argv
[])
856 while (i
< argc
&& argv
[i
][0] == '-') {
857 if (argv
[i
][1] == 'o')
858 strcpy(obj
, argv
[++i
]);
862 fprintf(stderr
, "neatcc: no file given\n");
866 ifd
= open(src
, O_RDONLY
);
878 while (*s
&& *s
!= '.')
884 ofd
= open(obj
, O_WRONLY
| O_TRUNC
| O_CREAT
, 0600);
885 out_write(ofd
, cs
, cslen
, cs
, 0);