2 #include "endianness.h"
6 #include "Script_internal.h"
13 #include "Assembler.h"
14 #include "kw_search.h"
31 struct sections_data
{
33 /* the offset points inside .text section to denote the start
34 of contents originating from a different file; to establish a mapping
35 between 'sourceline' statements and original file for debugging.
36 this happens when the editor converts and merges multiple dialog
37 script into a single script.
38 the number denotes the number of instructions to skip after start of
39 .text section, or using absolute offsets: (absolute_offset - offset_of_text)/4
40 example: when disassembled with -o, a dialogscript i tested looks
43 ; offset: 24 (insno 0)
45 ; offset: 27856 (insno 6958)
46 _run_dialog1$1: ; 1 args
48 ; offset: 27864 (insno 6960)
51 "__DialogScripts.asc" = 0
55 so _run_dialog1 starts at physical offset 27856, -24 = 27832,
57 in order to keep these offsets accurate after re-assembling (especially
58 after changes have been made, we should actually save the functionname
59 it points to, then after assembling put the so-calculated offset of
60 that same function there.
61 however since these sections are only interesting for debugging,
62 and only used (except those always at offset 0) in dialogscripts,
63 this is currently out of scope.
74 static int add_label(AS
*a
, char* name
, size_t insno
) {
75 char* tmp
= strdup(name
);
76 return htab_insert(a
->label_map
, tmp
, HTV_N(insno
)) != 0;
79 static unsigned get_label_offset(AS
*a
, char* name
) {
80 htab_value
*ret
= htab_find(a
->label_map
, name
);
82 fprintf(stderr
, "error: label '%s' not found\n", name
);
83 if(strncmp(name
, "label", 5)) fprintf(stderr
, "hint: label names must start with 'label'\n");
89 static int add_label_ref(AS
*a
, char * name
, size_t insno
) {
90 /* add reference to named label to a list. after the first pass
91 * over the code these locations have to be fixed with the offset
93 struct label item
= { .name
= strdup(name
), .insno
= insno
};
95 return List_add(a
->label_ref_list
, &item
);
98 static int add_function_ref(AS
*a
, char* name
, size_t insno
) {
99 /* add reference to named function to a list. after the first pass
100 * over the code these locations have to be fixed with the offset
102 struct label item
= { .name
= strdup(name
), .insno
= insno
};
104 return List_add(a
->function_ref_list
, &item
);
107 static int add_export(AS
*a
, int type
, char* name
, size_t offset
) {
108 struct export item
= { .fn
= strdup(name
), .instr
= offset
, .type
= type
};
110 assert(List_add(a
->export_list
, &item
));
111 assert(htab_insert(a
->export_map
, item
.fn
, HTV_N(List_size(a
->export_list
)-1)));
115 static int add_fixup(AS
*a
, int type
, size_t offset
) {
116 struct fixup item
= {.type
= type
, .offset
= offset
};
117 /* offset equals instruction number for non-DATADATA fixups */
118 return List_add(a
->fixup_list
, &item
);
121 static int add_sections_name(AS
*a
, char* name
, int value
) {
122 struct sections_data item
= {.name
= strdup(name
), .offset
= value
};
123 return List_add(a
->sections_list
, &item
);
126 static size_t add_or_get_string__offset(AS
* a
, char* str
) {
127 /* return offset of string in string table
128 * add to string table if not yet existing */
129 str
++; /* leading '"' */
130 size_t l
= strlen(str
), o
;
132 str
[l
] = 0; /* trailing '"' */
134 htab_value
*v
= htab_find(a
->string_offset_map
, str
);
137 struct string item
= {.ptr
= strdup(str
), .len
= l
};
138 o
= a
->string_section_length
;
139 assert(List_add(a
->string_list
, &item
));
140 assert(htab_insert(a
->string_offset_map
, item
.ptr
, HTV_N(o
)));
141 a
->string_section_length
+= l
+ 1;
145 static size_t get_string_section_length(AS
* a
) {
146 return a
->string_section_length
;
149 static int add_variable(AS
*a
, char* name
, unsigned vs
, size_t offset
) {
150 struct variable item
= { .name
= strdup(name
), .vs
= vs
, .offset
= offset
};
151 return List_add(a
->variable_list
, &item
);
154 static int get_variable_offset(AS
* a
, char* name
) {
155 /* return globaldata offset of named variable */
157 struct variable
*item
;
158 for(; i
< List_size(a
->variable_list
); i
++) {
159 assert((item
= List_getptr(a
->variable_list
, i
)));
160 if(!strcmp(item
->name
, name
))
163 fprintf(stderr
, "error: variable '%s' not found\n", name
);
168 static ssize_t
find_section(FILE* in
, char* name
, size_t *lineno
) {
170 size_t off
= 0, l
= strlen(name
);
172 fseek(in
, 0, SEEK_SET
);
173 while(fgets(buf
, sizeof buf
, in
)) {
174 *lineno
= *lineno
+1;
176 if(buf
[0] == '.' && memcmp(name
, buf
+ 1, l
) == 0)
182 static int asm_data(AS
* a
) {
184 ssize_t start
= find_section(a
->in
, "data", &lineno
);
185 if(start
== -1) return 1; // it is valid for .s file to only have .text
186 fseek(a
->in
, start
, SEEK_SET
);
189 while(fgets(buf
, sizeof buf
, a
->in
) && buf
[0] != '.') {
190 if(buf
[0] == '\n') continue;
191 char* p
= buf
, *pend
= buf
+ sizeof buf
, *var
;
194 if(*p
== '#' || *p
== ';') continue;
195 while(isspace(*p
) && p
< pend
) p
++;
196 if(*p
== ';') continue;
197 if(!memcmp(p
, "export", 6) && isspace(p
[6])) {
200 while(isspace(*p
) && p
< pend
) p
++;
202 if(memcmp(p
, "int", 3) == 0)
204 else if(memcmp(p
, "short", 5) == 0)
206 else if(memcmp(p
, "char", 4) == 0) {
211 while(isdigit(*q
) && q
< pend
) q
++;
212 if(vs
== 0 || *q
!= ']') {
213 fprintf(stderr
, "error: expected number > 0 and ']' after '['\n");
218 } else if(memcmp(p
, "string", 6) == 0)
221 fprintf(stderr
, "error: expected int, short, char, or string\n");
224 while(!isspace(*p
) && p
< pend
) p
++;
225 while(isspace(*p
) && p
< pend
) p
++;
227 while(!isspace(*p
) && p
< pend
) p
++;
229 assert(p
< pend
&& *p
== '=');
230 p
++; while(isspace(*p
) && p
< pend
) p
++;
236 if(memcmp(p
, "data", 4) == 0) {
238 while(isspace(*p
) && p
< pend
) p
++;
239 assert(p
< pend
&& *p
== '+');
241 while(isspace(*p
) && p
< pend
) p
++;
243 add_fixup(a
, FIXUP_DATADATA
, data_pos
);
246 fprintf(stderr
, "error: expected \"data\"\n");
254 for(value
= vs
; value
>= 10; value
-=10)
255 ByteArray_writeMem(a
->data
, (void*)"\0\0\0\0\0\0\0\0\0\0", 10);
256 while(value
--) ByteArray_writeUnsignedByte(a
->data
, 0);
259 ByteArray_writeInt(a
->data
, value
);
262 ByteArray_writeShort(a
->data
, value
);
265 ByteArray_writeUnsignedByte(a
->data
, value
);
269 if(exportflag
) add_export(a
, EXPORT_DATA
, var
, data_pos
);
270 add_variable(a
, var
, vs
, data_pos
);
276 ssize_t
get_import_index(AS
* a
, char* name
, size_t len
) {
278 htab_value
*v
= htab_find(a
->import_map
, name
);
283 void add_import(AS
*a
, char* name
) {
284 size_t l
= strlen(name
);
285 if(get_import_index(a
, name
, l
) != -1) return;
287 item
.ptr
= strdup(name
);
289 assert(List_add(a
->import_list
, &item
));
290 assert(htab_insert(a
->import_map
, item
.ptr
, HTV_N(List_size(a
->import_list
)-1)));
293 static int find_export(AS
*a
, int type
, char* name
, unsigned *offset
) {
295 htab_value
*v
= htab_find(a
->export_map
, name
);
297 assert((item
= List_getptr(a
->export_list
, v
->n
)));
298 assert(item
->type
== type
&& !strcmp(name
, item
->fn
));
299 *offset
= item
->instr
;
303 void generate_import_table(AS
*a
) {
307 for(i
= 0; i
< List_size(a
->function_ref_list
); i
++) {
308 assert((item
= List_getptr(a
->function_ref_list
, i
)));
309 if(!find_export(a
, EXPORT_FUNCTION
, item
->name
, &off
))
310 add_import(a
, item
->name
);
314 static int get_reg(char* regname
) {
315 return kw_find_reg(regname
, strlen(regname
));
318 #include "StringEscape.h"
319 /* expects a pointer to the first char after a opening " in a string,
320 * converts the string into convbuf, and returns the length of that string */
321 static size_t get_length_and_convert(char* x
, char* end
, char* convbuf
, size_t convbuflen
) {
323 char* e
= x
+ strlen(x
);
324 assert(e
> x
&& e
< end
&& *e
== 0);
326 while(isspace(*e
)) e
--;
327 if(*e
!= '"') return (size_t) -1;
329 result
= unescape(x
, convbuf
, convbuflen
);
333 /* sets lets char in arg to 0, and advances pointer till the next argstart */
334 static char* finalize_arg(char **p
, char* pend
, char* convbuf
, size_t convbuflen
) {
337 size_t l
= get_length_and_convert(*p
+ 1, pend
, convbuf
+1, convbuflen
- 1);
338 if(l
== (size_t) -1) return 0;
341 *p
= 0; /* make it crash if its accessed again, since a string should always be the last arg */
345 while(*p
< pend
&& **p
!= ',' && !isspace(**p
)) (*p
)++;
348 while(*p
< pend
&& isspace(**p
)) (*p
)++;
354 static int asm_strings(AS
*a
) {
355 /* add strings in .strings section, even when they're not used from .text */
357 ssize_t start
= find_section(a
->in
, "strings", &lineno
);
358 if(start
== -1) return 1;
359 fseek(a
->in
, start
, SEEK_SET
);
361 while(fgets(buf
, sizeof buf
, a
->in
) && buf
[0] != '.') {
363 if(*p
== '#' || *p
== ';') continue;
365 size_t l
= strlen(p
);
366 assert(l
>1 && p
[l
-1] == '\n' && p
[l
-2] == '"');
368 add_or_get_string__offset(a
, p
);
373 static int asm_sections(AS
*a
) {
374 /* add sections in .sections section */
376 ssize_t start
= find_section(a
->in
, "sections", &lineno
);
377 if(start
== -1) return 1;
378 fseek(a
->in
, start
, SEEK_SET
);
380 while(fgets(buf
, sizeof buf
, a
->in
) && buf
[0] != '.') {
382 if(strchr("#;\n\r", *p
)) continue;
384 size_t l
= strlen(p
);
385 assert(l
>1 && p
[l
-1] == '\n');
386 char *e
= strrchr(p
, '=');
389 while(--f
> p
&& isspace(*f
));
390 assert(f
> p
&& *f
== '"');
392 while(isspace(*(++e
)));
394 add_sections_name(a
, p
+1, val
);
399 static int asm_text(AS
*a
) {
401 ssize_t start
= find_section(a
->in
, "text", &lineno
);
402 if(start
== -1) return 1;
403 fseek(a
->in
, start
, SEEK_SET
);
405 char convbuf
[sizeof(buf
)]; /* to convert escaped string into non-escaped version */
407 while(fgets(buf
, sizeof buf
, a
->in
) && buf
[0] != '.') {
409 char* p
= buf
, *pend
= buf
+ sizeof buf
;
410 if(*p
== '#' || *p
== ';') continue;
411 while(isspace(*p
) && p
< pend
) p
++;
413 if(!*p
|| *p
== ';') continue;
415 while(!isspace(*p
) && p
< pend
) p
++;
417 size_t l
= strlen(sym
);
418 if(l
> 1 && sym
[l
-1] == ':') {
419 // functionstart or label
421 if(memcmp(sym
, "label", 5) == 0)
422 add_label(a
, sym
, pos
);
424 add_export(a
, EXPORT_FUNCTION
, sym
, pos
);
425 ByteArray_writeUnsignedInt(a
->code
, SCMD_THISBASE
);
426 ByteArray_writeUnsignedInt(a
->code
, pos
);
431 unsigned instr
= kw_find_insn(sym
, l
);
433 fprintf(stderr
, "line %zu: error: unknown instruction '%s'\n", lineno
, sym
);
436 if(instr
== SCMD_THISBASE
) continue; /* we emit this instruction ourselves when a new function starts. */
438 ByteArray_writeUnsignedInt(a
->code
, instr
);
441 for(arg
= 0; arg
< opcodes
[instr
].argcount
; arg
++) {
442 sym
= finalize_arg(&p
, pend
, convbuf
, sizeof(convbuf
));
444 fprintf(stderr
, "line %zu: error: expected \"\n", lineno
);
448 if(arg
< opcodes
[instr
].regcount
) {
450 if(instr
== SCMD_REGTOREG
) {
451 /* fix reversed order of arguments */
454 while(p
< pend
&& *p
!= ',' && !isspace(*p
)) p
++;
458 ByteArray_writeInt(a
->code
, value
);
459 ByteArray_writeInt(a
->code
, dst
);
466 /* immediate can be function name, string,
467 * variable name, stack fixup, or numeric value */
469 value
= add_or_get_string__offset(a
, sym
);
470 add_fixup(a
, FIXUP_STRING
, pos
);
471 } else if(sym
[0] == '@') {
472 value
= get_variable_offset(a
, sym
+1);
473 add_fixup(a
, FIXUP_GLOBALDATA
, pos
);
474 } else if(sym
[0] == '.') {
475 if(memcmp(sym
+1, "stack", 5)) {
476 fprintf(stderr
, "error: expected stack\n");
480 while(isspace(*sym
) && sym
< pend
) sym
++;
481 assert(sym
< pend
&& *sym
== '+');
483 while(isspace(*sym
) && sym
< pend
) sym
++;
484 add_fixup(a
, FIXUP_STACK
, pos
);
486 } else if(isdigit(sym
[0]) || sym
[0] == '-') {
487 if(sym
[0] == '-') assert(isdigit(sym
[1]));
490 add_function_ref(a
, sym
, pos
);
492 case SCMD_JMP
: case SCMD_JZ
: case SCMD_JNZ
:
493 add_label_ref(a
, sym
, pos
);
499 ByteArray_writeInt(a
->code
, value
);
506 for(i
= 0; i
< List_size(a
->label_ref_list
); i
++) {
507 assert((item
= List_getptr(a
->label_ref_list
, i
)));
508 ByteArray_set_position(a
->code
, item
->insno
* 4);
509 int lbl
= get_label_offset(a
, item
->name
);
510 assert(lbl
>= 0 && lbl
< pos
);
511 int label_insno
= lbl
- (item
->insno
+1); /* offset is calculated from next instruction */
512 ByteArray_writeInt(a
->code
, label_insno
);
514 generate_import_table(a
);
515 for(i
= 0; i
< List_size(a
->function_ref_list
); i
++) {
516 assert((item
= List_getptr(a
->function_ref_list
, i
)));
517 ssize_t imp
= get_import_index(a
, item
->name
, strlen(item
->name
));
520 assert(find_export(a
, EXPORT_FUNCTION
, item
->name
, &off
));
522 add_fixup(a
, FIXUP_FUNCTION
, item
->insno
);
524 add_fixup(a
, FIXUP_IMPORT
, item
->insno
);
527 ByteArray_set_position(a
->code
, item
->insno
* 4);
528 ByteArray_writeInt(a
->code
, imp
);
534 static void write_int(FILE* o
, int val
) {
535 val
= end_htole32(val
);
536 fwrite(&val
, 4, 1, o
);
539 static int fixup_comparefunc(const void *a
, const void* b
) {
540 const struct fixup
* fa
= a
, *fb
= b
;
541 if(fa
->type
== FIXUP_DATADATA
&& fb
->type
!= FIXUP_DATADATA
)
543 if(fb
->type
== FIXUP_DATADATA
&& fa
->type
!= FIXUP_DATADATA
)
545 if(fa
->offset
< fb
->offset
) return -1;
546 if(fa
->offset
== fb
->offset
) return 0;
550 static void sort_fixup_list(AS
* a
) {
551 List_sort(a
->fixup_list
, fixup_comparefunc
);
554 static void write_fixup_list(AS
* a
, FILE *o
) {
557 for(i
= 0; i
< List_size(a
->fixup_list
); i
++) {
558 assert((item
= List_getptr(a
->fixup_list
, i
)));
559 char type
= item
->type
;
560 fwrite(&type
, 1, 1, o
);
562 for(i
= 0; i
< List_size(a
->fixup_list
); i
++) {
563 assert((item
= List_getptr(a
->fixup_list
, i
)));
564 write_int(o
, item
->offset
);
568 static void write_string_section(AS
* a
, FILE* o
) {
571 for(; i
< List_size(a
->string_list
); i
++) {
572 assert(List_get(a
->string_list
, i
, &item
));
573 fwrite(item
.ptr
, item
.len
+ 1, 1, o
);
577 static void write_import_section(AS
* a
, FILE* o
) {
580 for(; i
< List_size(a
->import_list
); i
++) {
581 assert(List_get(a
->import_list
, i
, &item
));
582 fwrite(item
.ptr
, item
.len
+ 1, 1, o
);
586 static void write_export_section(AS
* a
, FILE* o
) {
589 for(; i
< List_size(a
->export_list
); i
++) {
590 assert(List_get(a
->export_list
, i
, &item
));
591 fwrite(item
.fn
, strlen(item
.fn
) + 1, 1, o
);
592 unsigned encoded
= (item
.type
<< 24) | (item
.instr
&0x00FFFFFF);
593 write_int(o
, encoded
);
597 static void write_sections_section(AS
* a
, FILE *o
) {
598 struct sections_data item
;
600 for(; i
< List_size(a
->sections_list
); i
++) {
601 assert(List_get(a
->sections_list
, i
, &item
));
602 fwrite(item
.name
, strlen(item
.name
) + 1, 1, o
);
603 write_int(o
, item
.offset
);
604 break; // FIXME : currently writing only first item - dialogscripts have more than one
608 static int write_object(AS
*a
, char *out
) {
610 if(!(o
= fopen(out
, "wb"))) return 0;
612 write_int(o
, 83); //version
613 write_int(o
, ByteArray_get_length(a
->data
)); // globaldatasize
614 write_int(o
, ByteArray_get_length(a
->code
) / 4); // codesize
615 write_int(o
, get_string_section_length(a
)); // stringssize
616 size_t l
= ByteArray_get_length(a
->data
);
619 p
= ByteArray_get_mem(a
->data
, 0, l
);
621 fwrite(p
,l
,1,o
); // globaldata
623 l
= ByteArray_get_length(a
->code
);
625 p
= ByteArray_get_mem(a
->code
, 0, l
);
627 fwrite(p
,l
,1,o
); // code
629 write_string_section(a
, o
);
630 write_int(o
, List_size(a
->fixup_list
));
632 write_fixup_list(a
, o
);
633 if(!List_size(a
->import_list
)) {
634 /* AGS declares object files with 0 imports as invalid */
637 write_int(o
, List_size(a
->import_list
));
638 write_import_section(a
, o
);
639 write_int(o
, List_size(a
->export_list
));
640 write_export_section(a
, o
);
641 write_int(o
, List_size(a
->sections_list
) ? 1 : 0); // FIXME we currently on write first section
642 write_sections_section(a
, o
);
643 write_int(o
, 0xbeefcafe); // magic end marker.
648 int AS_assemble(AS
* a
, char* out
) {
649 if(!asm_data(a
)) return 0;
650 if(!asm_text(a
)) return 0;
651 // if(!asm_strings(a)) return 0; // emitting unneeded strings is not necessary
652 if(!asm_sections(a
)) return 0;
653 if(!write_object(a
, out
)) return 0;
657 void AS_open_stream(AS
* a
, FILE* f
) {
658 memset(a
, 0, sizeof *a
);
660 a
->data
= &a
->data_b
;
661 a
->code
= &a
->code_b
;
662 ByteArray_ctor(a
->obj
);
663 ByteArray_open_mem(a
->obj
, 0, 0);
664 ByteArray_ctor(a
->data
);
665 ByteArray_set_endian(a
->data
, BAE_LITTLE
);
666 ByteArray_set_flags(a
->data
, BAF_CANGROW
);
667 ByteArray_open_mem(a
->data
, 0, 0);
668 ByteArray_ctor(a
->code
);
669 ByteArray_set_endian(a
->code
, BAE_LITTLE
);
670 ByteArray_set_flags(a
->code
, BAF_CANGROW
);
671 ByteArray_open_mem(a
->code
, 0, 0);
673 a
->export_list
= &a
->export_list_b
;
674 a
->fixup_list
= &a
->fixup_list_b
;
675 a
->string_list
= &a
->string_list_b
;
676 a
->label_ref_list
= &a
->label_ref_list_b
;
677 a
->function_ref_list
= &a
->function_ref_list_b
;
678 a
->variable_list
= &a
->variable_list_b
;
679 a
->import_list
= &a
->import_list_b
;
680 a
->sections_list
= &a
->sections_list_b
;
682 a
->label_map
= htab_create(128);
683 a
->import_map
= htab_create(128);
684 a
->export_map
= htab_create(128);
685 a
->string_offset_map
= htab_create(128);
687 List_init(a
->export_list
, sizeof(struct export
));
688 List_init(a
->fixup_list
, sizeof(struct fixup
));
689 List_init(a
->string_list
, sizeof(struct string
));
690 List_init(a
->label_ref_list
, sizeof(struct label
));
691 List_init(a
->function_ref_list
, sizeof(struct label
));
692 List_init(a
->variable_list
, sizeof(struct variable
));
693 List_init(a
->import_list
, sizeof(struct string
));
694 List_init(a
->sections_list
, sizeof(struct sections_data
));
697 a
->string_section_length
= 0;
701 int AS_open(AS
* a
, char* fn
) {
702 FILE *f
= fopen(fn
, "rb");
704 AS_open_stream(a
, f
);
709 void AS_close(AS
* a
) {