New "ea_data" module
[deark.git] / modules / basic-c64.c
blobd6a49f02a291bf9724787447d7255bbbf7a7053c
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // Detokenize Commodore 64 BASIC programs.
6 //
7 // For now at least, it emits a text file having PETSCII encoding.
8 //
9 // It might be nice to use UTF-8 instead, but that's not really possible,
10 // because not all PETSCII characters are represented in Unicode, and for
11 // various other reasons.
13 // Another idea is to write it to HTML format, but that will require some
14 // wizardry to do well.
16 #include <deark-config.h>
17 #include <deark-private.h>
18 DE_DECLARE_MODULE(de_module_basic_c64);
20 typedef struct localctx_struct {
21 dbuf *outf;
22 } lctx;
24 static const char *get_token(u8 b)
26 static const char *t[] = {"END","FOR","NEXT","DATA","INPUT#","INPUT","DIM",
27 "READ","LET","GOTO","RUN","IF","RESTORE","GOSUB","RETURN","REM","STOP",
28 "ON","WAIT","LOAD","SAVE","VERIFY","DEF","POKE","PRINT#","PRINT",
29 "CONT","LIST","CLR","CMD","SYS","OPEN","CLOSE","GET","NEW","TAB(","TO",
30 "FN","SPC(","THEN","NOT","STEP","+","-","*","/","^","AND","OR",">","=",
31 "<","SGN","INT","ABS","USR","FRE","POS","SQR","RND","LOG","EXP","COS",
32 "SIN","TAN","ATN","PEEK","LEN","STR$","VAL","ASC","CHR$","LEFT$",
33 "RIGHT$","MID$","GO"};
34 if(b>=0x80 && b<=0xcb) {
35 return t[((int)b)-0x80];
37 return NULL;
40 static void process_line(deark *c, lctx *d, i64 file_pos, i64 mem_pos,
41 i64 line_size)
43 i64 line_num;
44 i64 pos;
45 u8 b;
46 const char *token;
47 int in_quote = 0;
49 pos = file_pos;
50 line_num = de_getu16le(pos);
51 de_dbg(c, "line %d at %d, mem pos=%d, size=%d", (int)line_num, (int)file_pos,
52 (int)mem_pos, (int)line_size);
53 pos += 2;
55 dbuf_printf(d->outf, "%d ", (int)line_num);
57 while(pos < file_pos+line_size) {
58 b = de_getbyte(pos);
60 if(in_quote && b!=0x22) {
61 // Quoted string data. Don't translate.
62 // TODO: Can 0x00 occur in a string?
63 dbuf_writebyte(d->outf, b);
64 pos++;
65 continue;
68 if(b>=0x80) {
69 token = get_token(b);
70 if(token) {
71 dbuf_puts(d->outf, token);
73 else {
74 dbuf_puts(d->outf, "***ERROR***");
77 else if(b==0x00) {
78 break;
80 else {
81 dbuf_writebyte(d->outf, b);
82 if(b==0x22) { // Quotation mark
83 in_quote = !in_quote;
86 pos++;
89 dbuf_puts(d->outf, "\n");
92 static void de_run_basic_c64(deark *c, de_module_params *mparams)
94 lctx *d = NULL;
95 i64 file_pos;
96 i64 mem_start;
97 i64 mem_pos;
98 i64 next_line_ptr;
99 i64 line_size;
101 d = de_malloc(c, sizeof(lctx));
103 d->outf = dbuf_create_output_file(c, "c64.bas", NULL, 0);
105 // TODO: What if the first two bytes are not 0x01 0x08?
106 mem_start = 0x0801;
108 file_pos = 2;
110 while(file_pos < c->infile->len) {
111 mem_pos = file_pos - 2 + mem_start;
113 next_line_ptr = de_getu16le(file_pos);
114 if(next_line_ptr==0x0000) {
115 break;
118 line_size = next_line_ptr - mem_pos - 2;
119 if(line_size<1) {
120 break;
122 process_line(c, d, file_pos+2, mem_pos+2, line_size);
123 file_pos += 2 + line_size;
126 dbuf_close(d->outf);
127 de_free(c, d);
130 static int de_identify_basic_c64(deark *c)
132 u8 buf[8];
134 if(de_input_file_has_ext(c, "prg")) {
135 de_read(buf, 0, 2);
136 if(!de_memcmp(buf, "\x01\x08", 2)) return 20;
138 return 0;
141 void de_module_basic_c64(deark *c, struct deark_module_info *mi)
143 mi->id = "basic_c64";
144 mi->desc = "Detokenize C64 BASIC";
145 mi->run_fn = de_run_basic_c64;
146 mi->identify_fn = de_identify_basic_c64;