2 * Copyright (C) 2024, 2025 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 private unit compiler.optimize.utils;
21 uses compiler.optimize.defs;
23 fn function_pcode(params : list(pcode_t)) : list(pcode_t);
24 fn function_name(params : list(pcode_t)) : bytes;
25 fn function_extract_nested(pc : list(pcode_t), fn_idx : list(int)) : list(pcode_t);
26 fn function_specifier_length(params : list(pcode_t)) : int;
27 fn function_load(params : list(pcode_t)) : (int, function);
28 fn function_store(f : function) : list(pcode_t);
30 fn create_instr(opcode : pcode_t, params : list(pcode_t), bgi : int) : instruction;
31 fn load_function_context(pc : list(pcode_t)) : context;
32 fn dump_basic_blocks(ctx : context, dump_it : bool) : list(pcode_t);
37 uses compiler.common.blob;
38 uses compiler.common.evaluate;
39 uses compiler.parser.util;
41 fn function_specifier_length(params : list(pcode_t)) : int
43 var bl := blob_length(params[1 .. ]);
44 return 1 + bl + 1 + params[1 + bl];
47 fn function_load(params : list(pcode_t)) : (int, function)
49 var bl := blob_length(params[1 .. ]);
51 path_idx : params[0] shr 1,
52 program : (params[0] and 1) <> 0,
53 un : blob_load(params[1 .. ]),
54 fn_idx : fill(0, params[1 + bl]),
56 for i := 0 to params[1 + bl] do
57 r.fn_idx[i] := params[1 + bl + 1 + i];
58 return function_specifier_length(params), r;
61 fn function_store(f : function) : list(pcode_t)
63 var pc := list(pcode_t).[ (f.path_idx shl 1) + select(f.program, 0, 1) ];
64 pc += blob_store(f.un);
65 pc += list(pcode_t).[ len(f.fn_idx) ];
66 for i := 0 to len(f.fn_idx) do
71 fn function_pcode(params : list(pcode_t)) : list(pcode_t)
73 var l, f := function_load(params);
74 var fi := list(pcode_t).[ len(f.fn_idx) ];
75 for i := 0 to len(f.fn_idx) do
77 return load_optimized_pcode(f.path_idx, f.un, f.program, fi, false);
80 fn function_name(params : list(pcode_t)) : bytes
82 var pc := function_pcode(params);
83 return blob_load(pc[9 .. ]);
86 fn function_extract_nested(pc : list(pcode_t), fn_idx : list(int)) : list(pcode_t)
88 fn_idx := fn_idx[1 .. ];
89 for idx in list_consumer(fn_idx) do [
91 abort internal("function_extract_nested: too high function index: " + ntos(idx) + " >= " + ntos(pc[2]));
92 var ptr := 9 + blob_length(pc[9 .. ]);
97 pc := pc[ptr + 1 .. ptr + 1 + pc[ptr]];
103 fn decode_structured_params(offs : int, params : list(pcode_t)) : (int, param_set, param_set)
105 var ps : param_set := 0;
106 var ls : param_set := 0;
107 for i := 0 to params[0] do [
108 var scode := params[offs];
109 if scode = Structured_Record then [
112 ] else if scode = Structured_Option then [
114 ] else if scode = Structured_Array then [
119 abort internal("invalid structured type");
125 fn create_instr(opcode : pcode_t, params : list(pcode_t), bgi : int) : instruction
128 var ins := instruction.[
140 if opcode = P_BinaryOp then [
141 ins.read_set := 0 bts 3 bts 5;
142 ins.free_set := ins.read_set;
143 ins.write_set := 0 bts 1;
145 ] else if opcode = P_BinaryConstOp then [
146 ins.read_set := 0 bts 3;
147 ins.free_set := ins.read_set;
148 ins.write_set := 0 bts 1;
150 ] else if opcode = P_UnaryOp then [
151 ins.read_set := 0 bts 3;
152 ins.free_set := ins.read_set;
153 ins.write_set := 0 bts 1;
155 ] else if opcode = P_Copy then [
156 ins.read_set := 0 bts 2;
157 ins.free_set := ins.read_set;
158 ins.write_set := 0 bts 0;
160 ] else if opcode = P_Copy_Type_Cast then [
161 ins.read_set := 0 bts 2;
162 ins.free_set := ins.read_set;
163 ins.write_set := 0 bts 0;
165 ] else if opcode = P_Free then [
166 ins.read_set := 0 bts 0;
168 ] else if opcode = P_Eval then [
169 ins.read_set := 0 bts 0;
171 ] else if opcode = P_Keep then [
172 ins.read_set := 0 bts 0;
174 ] else if opcode = P_Fn then [
175 ins.write_set := 0 bts 0;
176 xlen := 3 + params[1] + params[2];
177 for i := 3 to xlen do [
182 ] else if opcode = P_Load_Local_Type then [
183 ins.write_set := 0 bts 0;
185 ] else if opcode = P_Load_Fn then [
186 var l := function_specifier_length(params[3 .. ]);
187 for i := 0 to params[1] do [
188 ins.read_set bts= 3 + l + i * 2 + 1;
189 ins.free_set bts= 3 + l + i * 2 + 1;
191 ins.write_set := 0 bts 0;
192 xlen := 3 + l + params[1] * 2;
193 ] else if opcode = P_Curry then [
194 ins.read_set := 0 bts 3;
195 ins.free_set := 0 bts 3;
196 for i := 0 to params[1] do [
197 ins.read_set bts= 4 + i * 2 + 1;
198 ins.free_set bts= 4 + i * 2 + 1;
200 ins.write_set := 0 bts 0;
201 xlen := 4 + params[1] * 2;
202 ] else if opcode = P_Call then [
203 var l := function_specifier_length(params[3 .. ] );
204 for i := 0 to params[2] do [
205 ins.read_set bts= 3 + l + i * 2 + 1;
206 ins.free_set bts= 3 + l + i * 2 + 1;
208 for i := 0 to params[1] do
209 ins.write_set bts= 3 + l + params[2] * 2 + i;
210 xlen := 3 + l + params[2] * 2 + params[1];
211 ] else if opcode = P_Call_Indirect then [
212 ins.read_set := 0 bts 4;
213 ins.free_set := 0 bts 4;
214 for i := 0 to params[2] do [
215 ins.read_set bts= 5 + i * 2 + 1;
216 ins.free_set bts= 5 + i * 2 + 1;
218 for i := 0 to params[1] do
219 ins.write_set bts= 5 + params[2] * 2 + i;
220 xlen := 5 + params[2] * 2 + params[1];
221 ] else if opcode = P_Load_Const then [
222 ins.write_set := 0 bts 0;
223 xlen := 1 + blob_length(params[1 .. ]);
224 ] else if opcode = P_Structured_Write then [
225 ins.read_set := 0 bts 3 bts 5;
226 ins.free_set := 0 bts 3 bts 5;
227 ins.write_set := 0 bts 1;
228 var pmask lmask : param_set;
229 xlen, pmask, lmask := decode_structured_params(6, params);
230 ins.read_set or= pmask;
232 ins.conflict_1 := 0 bts 5 or pmask;
233 ins.conflict_2 := 0 bts 1;
234 ] else if opcode = P_Record_Type or opcode = P_Option_Type then [
235 for i := 0 to params[1] do
236 ins.read_set bts= 2 + i;
237 ins.write_set := 0 bts 0;
238 var l := function_specifier_length(params[2 + params[1] .. ]);
239 xlen := 2 + params[1] + l;
240 ] else if opcode = P_Record_Create then [
241 ins.write_set := 0 bts 0;
242 for i := 0 to params[1] do [
243 ins.read_set bts= 2 + i * 2 + 1;
244 ins.free_set bts= 2 + i * 2 + 1;
246 xlen := 2 + params[1] * 2;
247 ] else if opcode = P_Record_Load_Slot then [
248 ins.read_set := 0 bts 1;
249 ins.write_set := 0 bts 0;
251 ] else if opcode = P_Record_Load then [
252 ins.read_set := 0 bts 2;
253 ins.write_set := 0 bts 0;
256 ] else if opcode = P_Option_Create then [
257 ins.read_set := 0 bts 3;
258 ins.free_set := 0 bts 3;
259 ins.write_set := 0 bts 0;
261 ] else if opcode = P_Option_Load then [
262 ins.read_set := 0 bts 2;
263 ins.write_set := 0 bts 0;
266 ] else if opcode = P_Option_Test then [
267 ins.read_set := 0 bts 1;
268 ins.write_set := 0 bts 0;
270 ] else if opcode = P_Option_Ord then [
271 ins.read_set := 0 bts 1;
272 ins.write_set := 0 bts 0;
274 ] else if opcode = P_Array_Flexible then [
275 ins.read_set := 0 bts 1;
276 ins.write_set := 0 bts 0;
278 ] else if opcode = P_Array_Fixed then [
279 ins.read_set := 0 bts 1 bts 2;
280 ins.write_set := 0 bts 0;
282 ] else if opcode = P_Array_Create then [
283 ins.read_set := 0 bts 3;
284 ins.write_set := 0 bts 0;
285 for i := 0 to params[2] do [
286 ins.read_set bts= 4 + i * 2 + 1;
287 ins.free_set bts= 4 + i * 2 + 1;
289 ins.lt_set := 0 bts 1;
290 xlen := 4 + params[2] * 2;
291 ] else if opcode = P_Array_Fill then [
292 ins.read_set := 0 bts 3 bts 4;
293 ins.free_set := 0 bts 3;
294 ins.write_set := 0 bts 0;
295 ins.lt_set := 0 bts 1;
297 ] else if opcode = P_Array_String then [
298 ins.write_set := 0 bts 0;
299 xlen := 1 + blob_length(params[1 .. ]);
300 ] else if opcode = P_Array_Unicode then [
301 ins.write_set := 0 bts 0;
303 ] else if opcode = P_Array_Load then [
304 ins.read_set := 0 bts 2 bts 3;
305 ins.write_set := 0 bts 0;
308 ] else if opcode = P_Array_Len then [
309 ins.read_set := 0 bts 1;
310 ins.write_set := 0 bts 0;
312 ] else if opcode = P_Array_Len_Greater_Than then [
313 ins.read_set := 0 bts 1 bts 2;
314 ins.write_set := 0 bts 0;
316 ] else if opcode = P_Array_Sub then [
317 ins.read_set := 0 bts 2 bts 3 bts 4;
318 ins.free_set := 0 bts 2;
319 ins.write_set := 0 bts 0;
321 ] else if opcode = P_Array_Skip then [
322 ins.read_set := 0 bts 2 bts 3;
323 ins.free_set := 0 bts 2;
324 ins.write_set := 0 bts 0;
326 ] else if opcode = P_Array_Append then [
327 ins.read_set := 0 bts 2 bts 4;
328 ins.free_set := 0 bts 2 bts 4;
329 ins.write_set := 0 bts 0;
331 ] else if opcode = P_Array_Append_One then [
332 ins.read_set := 0 bts 2 bts 4;
333 ins.free_set := 0 bts 2 bts 4;
334 ins.write_set := 0 bts 0;
336 ] else if opcode = P_Array_Flatten then [
337 ins.read_set := 0 bts 2;
338 ins.free_set := 0 bts 2;
339 ins.write_set := 0 bts 0;
341 ] else if opcode = P_Jmp then [
343 ] else if opcode = P_Jmp_False then [
344 ins.read_set := 0 bts 0;
346 ] else if opcode = P_Label then [
348 ] else if opcode = P_IO then [
349 for i := 0 to params[1] do [
350 ins.write_set bts= 4 + i;
352 for i := params[1] to params[1] + params[2] do [
353 ins.read_set bts= 4 + i;
355 ins.conflict_1 := ins.read_set;
356 ins.conflict_2 := ins.write_set;
357 xlen := 4 + params[1] + params[2] + params[3];
358 ] else if opcode = P_Args then [
359 for i := 0 to len(params) do
360 ins.write_set bts= i;
362 ] else if opcode = P_Return_Vars then [
363 for i := 0 to len(params) do
364 ins.write_set bts= i;
366 ] else if opcode = P_Return then [
368 while i < len(params) do [
374 ] else if opcode = P_Assume then [
375 ins.read_set := 0 bts 0;
377 ] else if opcode = P_Claim then [
378 ins.read_set := 0 bts 0;
380 ] else if opcode = P_Checkpoint then [
382 ] else if opcode = P_Line_Info then [
383 if params[0] < 0 then
384 abort internal("P_Line_Info: negative line info");
386 ] else if opcode = P_Phi then [
387 ins.write_set := 0 bts 0;
388 for i := 1 to len(params) do
392 abort internal("invalid opcode");
395 if xlen <> len(params) then
396 abort internal("length mismatch on opcode " + ntos(opcode) + ": " + ntos(xlen) + " <> " + ntos(len(params)));
398 var rs := ins.read_set;
400 var s : int := bsr rs;
402 if ins.params[s] < 0 then [
404 ins.conflict_1 btr= s;
405 ins.conflict_2 btr= s;
408 var ls := ins.lt_set;
410 var s : int := bsr ls;
412 if ins.params[s] < 0 then [
421 fn set_arrow(ctx : context, src : int, dst : int) : context
423 {var sblk := ctx.blocks[src];
424 for i := 0 to len(sblk.instrs) do [
425 eval debug("pcode: " + ntos(sblk.instrs[i].opcode));
427 ctx.blocks[dst].pred_position +<= len(ctx.blocks[src].post_list);
428 ctx.blocks[dst].pred_list +<= src;
429 ctx.blocks[src].post_list +<= dst;
430 //eval debug("arrow from " + ntos(src) + " to " + ntos(dst) + " total " + ntos(len(ctx.blocks)));
434 fn load_function_context(pc : list(pcode_t)) : context
437 local_types : empty(local_type),
438 instrs : empty(instruction),
439 blocks : empty(basic_block),
441 variables : exception_make(list(variable), ec_sync, error_record_field_not_initialized, 0, false),
442 label_to_block : exception_make(list(int), ec_sync, error_record_field_not_initialized, 0, false),
443 var_map : exception_make(list(int), ec_sync, error_record_field_not_initialized, 0, false),
444 cm : exception_make(conflict_map, ec_sync, error_record_field_not_initialized, 0, false),
445 should_retry : exception_make(bool, ec_sync, error_record_field_not_initialized, 0, false),
447 name : blob_load(pc[9 .. ]),
450 var ptr := 9 + blob_length(pc[9 .. ]);
452 for i := 0 to pc[2] do [
456 for i := 0 to pc[3] do [
460 if ft = Local_Type_Record then [
461 var n, f := function_load(pc[ptr .. ]);
463 lt := local_type.rec.(f);
464 ] else if ft = Local_Type_Flat_Record then [
465 var non_flat_rec := pc[ptr];
466 var n_entries := pc[ptr + 1];
467 lt := local_type.flat_rec.(local_type_flat_record.[ non_flat_record : non_flat_rec, flat_types : empty(int) ]);
469 for j := 0 to n_entries do [
470 lt.flat_rec.flat_types +<= pc[ptr];
473 ] else if ft = Local_Type_Flat_Array then [
474 lt := local_type.flat_array.(local_type_flat_array.[ flat_type : pc[ptr], number_of_elements : pc[ptr + 1] ]);
477 abort internal("unknown local type " + ntos(ft));
479 ctx.local_types +<= lt;
482 var n_variables := pc[4];
483 ctx.variables := fill(new_variable, n_variables);
485 ctx.label_to_block := fill(-1, pc[8]);
487 for i := 0 to n_variables do [
488 ctx.variables[i].type_index := pc[ptr];
489 ctx.variables[i].runtime_type := pc[ptr + 1];
490 ctx.variables[i].local_type := -1;
491 ctx.variables[i].color := pc[ptr + 2];
492 ctx.variables[i].must_be_flat := pc[ptr + 3] bt bsf VarFlag_Must_Be_Flat;
493 ctx.variables[i].must_be_data := pc[ptr + 3] bt bsf VarFlag_Must_Be_Data;
494 ctx.variables[i].is_option_type := false;
496 ctx.variables[i].name := blob_load(pc[ptr .. ]);
497 ptr += blob_length(pc[ptr .. ]);
499 if ctx.variables[i].runtime_type < T_Undetermined then
500 abort internal("load_function_context: invalid runtime type: " + ctx.name + ", " + ntos(i) + "(" + ctx.variables[i].name + "): " + ntos(ctx.variables[i].runtime_type));
503 var b := new_basic_block;
505 while ptr < len(pc) do [
506 var instr_len := pc[ptr + 1] + 2;
507 var ins := create_instr(pc[ptr], pc[ptr + 2 .. ptr + instr_len], len(ctx.blocks));
509 var free_set : param_set := ins.free_set;
510 while free_set <> 0 do [
511 var arg : int := bsr free_set;
513 ins.params[arg - 1] and= not Flag_Free_Argument;
516 if ins.opcode = P_Jmp or ins.opcode = P_Jmp_False or ins.opcode = P_Return then [
517 b.instrs +<= len(ctx.instrs);
520 b := new_basic_block;
521 ] else if ins.opcode = P_Label then [
522 if len_greater_than(int, b.instrs, 0) then [
524 b := new_basic_block;
527 if ctx.label_to_block[ins.params[0]] >= 0 then
528 abort internal("load_function_context: label already defined");
529 ctx.label_to_block[ins.params[0]] := len(ctx.blocks);
530 b.instrs +<= len(ctx.instrs);
532 ] else if ins.opcode = P_Free then [
533 ] else if ins.opcode = P_Checkpoint then [
535 b.instrs +<= len(ctx.instrs);
541 if ptr > len(pc) then
542 abort internal("load_function_context: " + ctx.name + ": pcode doesn't match");
543 if len(b.instrs) > 0 then
544 abort internal("load_function_context: " + ctx.name + ": the last basic block is not finished");
546 for i := 0 to len(ctx.blocks) do [
548 var block := ctx.blocks[i];
549 if len(block.instrs) = 0 then [
550 ctx := set_arrow(ctx, i, i + 1);
553 var first := ctx.instrs[ block.instrs[0] ];
554 if first.opcode = P_Label then [
555 ctx.blocks[i].instrs := block.instrs[1 .. ];
558 var last := ctx.instrs[ block.instrs[ len(block.instrs) - 1 ] ];
560 if last.opcode = P_Jmp then [
561 var target := last.params[0];
562 ctx := set_arrow(ctx, i, ctx.label_to_block[target]);
563 ctx.blocks[i].instrs := block.instrs[ .. len(block.instrs) - 1];
564 ] else if last.opcode = P_Jmp_False then [
565 var target1 := last.params[1];
566 var target2 := last.params[2];
567 ctx := set_arrow(ctx, i, i + 1);
568 ctx := set_arrow(ctx, i, ctx.label_to_block[target1]);
569 ctx := set_arrow(ctx, i, ctx.label_to_block[target2]);
570 ] else if last.opcode <> P_Return then [
571 ctx := set_arrow(ctx, i, i + 1);
578 fn dump_basic_blocks(ctx : context, dump_it : bool) : list(pcode_t)
580 //dump_it := ctx.name = "main" or ctx.name = "fact";
582 eval debug("-----------------------------------------------------------------");
583 eval debug("dump_basic_blocks: " + ctx.name);
585 var rpc := empty(pcode_t);
586 var worklist : node_set := 1;
587 var done : node_set := 0;
588 while worklist <> 0 do [
589 var bgi : int := bsr worklist;
592 eval debug("process block from worklist " + ntos(bgi));
595 goto process_block_no_label;
601 eval debug("generate label " + ntos(bgi - 1));
602 process_block_no_label:
605 var block := ctx.blocks[bgi];
606 for ili := 0 to len(block.instrs) do [
607 var ins := ctx.instrs[block.instrs[ili]];
609 rpc +<= len(ins.params);
612 var instr_name := (pcode_name(ins.opcode) + " ")[ .. 20];
613 var msg := "instr: " + instr_name + " ";
614 if ins.opcode = P_Jmp_False then [
615 ins.params[1] := block.post_list[1] - 1;
616 ins.params[2] := block.post_list[2] - 1;
618 for i := 0 to len(ins.params) do
619 msg += " " + ntos(ins.params[i]);
621 var read_elided := true;
622 var write_elided := true;
623 var read_set := ins.read_set;
624 if ins.opcode = P_Free then [
627 while read_set <> 0 do [
628 var x : int := bsf read_set;
630 var v := ins.params[x];
631 var va := ctx.variables[v];
632 msg += " r(" + ntos(v) + "," + ntos(va.type_index) + "," + ntos(va.runtime_type) + ":" + ntos(va.color) + ")";
633 if v < 0 or ctx.variables[v].color = -1 then
636 read_elided := false;
638 var write_set := ins.write_set;
639 var something_written := false;
640 while write_set <> 0 do [
641 var x : int := bsf write_set;
643 var v := ins.params[x];
644 var va := ctx.variables[v];
645 msg += " w(" + ntos(v) + "," + ntos(va.type_index) + "," + ntos(va.runtime_type) + ":" + ntos(va.color) + ")";
646 if ctx.variables[v].color = -1 then
649 write_elided := false;
650 something_written := true;
652 if ins.opcode = P_Call or ins.opcode = P_Load_Fn then [
653 msg += " " + function_name(ins.params[3 .. ]);
655 if read_elided and write_elided then
656 msg := bytes.[27] + "[31m" + msg +< 27 + "[0m";
657 else if write_elided and something_written then
658 msg := bytes.[27] + "[35m" + msg +< 27 + "[0m";
660 msg := bytes.[27] + "[32m" + msg +< 27 + "[0m";
667 if len(block.post_list) = 0 then
669 for i := 1 to len(block.post_list) do [
670 var post_idx := block.post_list[i];
671 if not done bt post_idx then
672 worklist bts= post_idx;
673 if rpc[len(rpc) - 5] <> P_Jmp_False then
674 abort internal("a block with multiple outputs doesn't end with P_Jmp_False");
675 rpc[len(rpc) - 3 + i] := post_idx - 1;
677 var next_bgi := block.post_list[0];
678 if done bt next_bgi then [
681 rpc +<= next_bgi - 1;
683 eval debug("generating jump to label " + ntos(next_bgi - 1));
688 eval debug("process following block " + ntos(bgi));
689 if len(ctx.blocks[bgi].pred_list) <= 1 then
690 goto process_block_no_label;