tcg/ppc64: implement not_i32/64 and ext32u_i64
[qemu/mdroth.git] / tcg / optimize.c
blob7e7f2b20204636e8cab7533ddd8bd909bf17f2a0
1 /*
2 * Optimizations for Tiny Code Generator for QEMU
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
26 #include "config.h"
28 #include <stdlib.h>
29 #include <stdio.h>
31 #include "qemu-common.h"
32 #include "tcg-op.h"
34 #define CASE_OP_32_64(x) \
35 glue(glue(case INDEX_op_, x), _i32): \
36 glue(glue(case INDEX_op_, x), _i64)
38 typedef enum {
39 TCG_TEMP_UNDEF = 0,
40 TCG_TEMP_CONST,
41 TCG_TEMP_COPY,
42 TCG_TEMP_HAS_COPY,
43 TCG_TEMP_ANY
44 } tcg_temp_state;
46 struct tcg_temp_info {
47 tcg_temp_state state;
48 uint16_t prev_copy;
49 uint16_t next_copy;
50 tcg_target_ulong val;
53 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
55 /* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
56 class of equivalent temp's, a new representative should be chosen in this
57 class. */
58 static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
60 int i;
61 TCGArg new_base = (TCGArg)-1;
62 if (temps[temp].state == TCG_TEMP_HAS_COPY) {
63 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
64 if (i >= nb_globals) {
65 temps[i].state = TCG_TEMP_HAS_COPY;
66 new_base = i;
67 break;
70 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
71 if (new_base == (TCGArg)-1) {
72 temps[i].state = TCG_TEMP_ANY;
73 } else {
74 temps[i].val = new_base;
77 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
78 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
79 } else if (temps[temp].state == TCG_TEMP_COPY) {
80 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
81 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
82 new_base = temps[temp].val;
84 temps[temp].state = TCG_TEMP_ANY;
85 if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
86 temps[new_base].state = TCG_TEMP_ANY;
90 static int op_bits(enum TCGOpcode op)
92 const TCGOpDef *def = &tcg_op_defs[op];
93 return def->flags & TCG_OPF_64BIT ? 64 : 32;
96 static int op_to_movi(int op)
98 switch (op_bits(op)) {
99 case 32:
100 return INDEX_op_movi_i32;
101 case 64:
102 return INDEX_op_movi_i64;
103 default:
104 fprintf(stderr, "op_to_movi: unexpected return value of "
105 "function op_bits.\n");
106 tcg_abort();
110 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
111 TCGArg src, int nb_temps, int nb_globals)
113 reset_temp(dst, nb_temps, nb_globals);
114 assert(temps[src].state != TCG_TEMP_COPY);
115 /* Don't try to copy if one of temps is a global or either one
116 is local and another is register */
117 if (src >= nb_globals && dst >= nb_globals &&
118 tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
119 assert(temps[src].state != TCG_TEMP_CONST);
120 if (temps[src].state != TCG_TEMP_HAS_COPY) {
121 temps[src].state = TCG_TEMP_HAS_COPY;
122 temps[src].next_copy = src;
123 temps[src].prev_copy = src;
125 temps[dst].state = TCG_TEMP_COPY;
126 temps[dst].val = src;
127 temps[dst].next_copy = temps[src].next_copy;
128 temps[dst].prev_copy = src;
129 temps[temps[dst].next_copy].prev_copy = dst;
130 temps[src].next_copy = dst;
132 gen_args[0] = dst;
133 gen_args[1] = src;
136 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
137 int nb_temps, int nb_globals)
139 reset_temp(dst, nb_temps, nb_globals);
140 temps[dst].state = TCG_TEMP_CONST;
141 temps[dst].val = val;
142 gen_args[0] = dst;
143 gen_args[1] = val;
146 static int op_to_mov(int op)
148 switch (op_bits(op)) {
149 case 32:
150 return INDEX_op_mov_i32;
151 case 64:
152 return INDEX_op_mov_i64;
153 default:
154 fprintf(stderr, "op_to_mov: unexpected return value of "
155 "function op_bits.\n");
156 tcg_abort();
160 static TCGArg do_constant_folding_2(int op, TCGArg x, TCGArg y)
162 switch (op) {
163 CASE_OP_32_64(add):
164 return x + y;
166 CASE_OP_32_64(sub):
167 return x - y;
169 CASE_OP_32_64(mul):
170 return x * y;
172 CASE_OP_32_64(and):
173 return x & y;
175 CASE_OP_32_64(or):
176 return x | y;
178 CASE_OP_32_64(xor):
179 return x ^ y;
181 case INDEX_op_shl_i32:
182 return (uint32_t)x << (uint32_t)y;
184 case INDEX_op_shl_i64:
185 return (uint64_t)x << (uint64_t)y;
187 case INDEX_op_shr_i32:
188 return (uint32_t)x >> (uint32_t)y;
190 case INDEX_op_shr_i64:
191 return (uint64_t)x >> (uint64_t)y;
193 case INDEX_op_sar_i32:
194 return (int32_t)x >> (int32_t)y;
196 case INDEX_op_sar_i64:
197 return (int64_t)x >> (int64_t)y;
199 case INDEX_op_rotr_i32:
200 x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y);
201 return x;
203 case INDEX_op_rotr_i64:
204 x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y);
205 return x;
207 case INDEX_op_rotl_i32:
208 x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y));
209 return x;
211 case INDEX_op_rotl_i64:
212 x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y));
213 return x;
215 CASE_OP_32_64(not):
216 return ~x;
218 CASE_OP_32_64(neg):
219 return -x;
221 CASE_OP_32_64(andc):
222 return x & ~y;
224 CASE_OP_32_64(orc):
225 return x | ~y;
227 CASE_OP_32_64(eqv):
228 return ~(x ^ y);
230 CASE_OP_32_64(nand):
231 return ~(x & y);
233 CASE_OP_32_64(nor):
234 return ~(x | y);
236 CASE_OP_32_64(ext8s):
237 return (int8_t)x;
239 CASE_OP_32_64(ext16s):
240 return (int16_t)x;
242 CASE_OP_32_64(ext8u):
243 return (uint8_t)x;
245 CASE_OP_32_64(ext16u):
246 return (uint16_t)x;
248 case INDEX_op_ext32s_i64:
249 return (int32_t)x;
251 case INDEX_op_ext32u_i64:
252 return (uint32_t)x;
254 default:
255 fprintf(stderr,
256 "Unrecognized operation %d in do_constant_folding.\n", op);
257 tcg_abort();
261 static TCGArg do_constant_folding(int op, TCGArg x, TCGArg y)
263 TCGArg res = do_constant_folding_2(op, x, y);
264 if (op_bits(op) == 32) {
265 res &= 0xffffffff;
267 return res;
270 /* Propagate constants and copies, fold constant expressions. */
271 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
272 TCGArg *args, TCGOpDef *tcg_op_defs)
274 int i, nb_ops, op_index, op, nb_temps, nb_globals, nb_call_args;
275 const TCGOpDef *def;
276 TCGArg *gen_args;
277 TCGArg tmp;
278 /* Array VALS has an element for each temp.
279 If this temp holds a constant then its value is kept in VALS' element.
280 If this temp is a copy of other ones then this equivalence class'
281 representative is kept in VALS' element.
282 If this temp is neither copy nor constant then corresponding VALS'
283 element is unused. */
285 nb_temps = s->nb_temps;
286 nb_globals = s->nb_globals;
287 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
289 nb_ops = tcg_opc_ptr - gen_opc_buf;
290 gen_args = args;
291 for (op_index = 0; op_index < nb_ops; op_index++) {
292 op = gen_opc_buf[op_index];
293 def = &tcg_op_defs[op];
294 /* Do copy propagation */
295 if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
296 assert(op != INDEX_op_call);
297 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
298 if (temps[args[i]].state == TCG_TEMP_COPY) {
299 args[i] = temps[args[i]].val;
304 /* For commutative operations make constant second argument */
305 switch (op) {
306 CASE_OP_32_64(add):
307 CASE_OP_32_64(mul):
308 CASE_OP_32_64(and):
309 CASE_OP_32_64(or):
310 CASE_OP_32_64(xor):
311 CASE_OP_32_64(eqv):
312 CASE_OP_32_64(nand):
313 CASE_OP_32_64(nor):
314 if (temps[args[1]].state == TCG_TEMP_CONST) {
315 tmp = args[1];
316 args[1] = args[2];
317 args[2] = tmp;
319 break;
320 default:
321 break;
324 /* Simplify expression if possible. */
325 switch (op) {
326 CASE_OP_32_64(add):
327 CASE_OP_32_64(sub):
328 CASE_OP_32_64(shl):
329 CASE_OP_32_64(shr):
330 CASE_OP_32_64(sar):
331 CASE_OP_32_64(rotl):
332 CASE_OP_32_64(rotr):
333 if (temps[args[1]].state == TCG_TEMP_CONST) {
334 /* Proceed with possible constant folding. */
335 break;
337 if (temps[args[2]].state == TCG_TEMP_CONST
338 && temps[args[2]].val == 0) {
339 if ((temps[args[0]].state == TCG_TEMP_COPY
340 && temps[args[0]].val == args[1])
341 || args[0] == args[1]) {
342 args += 3;
343 gen_opc_buf[op_index] = INDEX_op_nop;
344 } else {
345 gen_opc_buf[op_index] = op_to_mov(op);
346 tcg_opt_gen_mov(s, gen_args, args[0], args[1],
347 nb_temps, nb_globals);
348 gen_args += 2;
349 args += 3;
351 continue;
353 break;
354 CASE_OP_32_64(mul):
355 if ((temps[args[2]].state == TCG_TEMP_CONST
356 && temps[args[2]].val == 0)) {
357 gen_opc_buf[op_index] = op_to_movi(op);
358 tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
359 args += 3;
360 gen_args += 2;
361 continue;
363 break;
364 CASE_OP_32_64(or):
365 CASE_OP_32_64(and):
366 if (args[1] == args[2]) {
367 if (args[1] == args[0]) {
368 args += 3;
369 gen_opc_buf[op_index] = INDEX_op_nop;
370 } else {
371 gen_opc_buf[op_index] = op_to_mov(op);
372 tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
373 nb_globals);
374 gen_args += 2;
375 args += 3;
377 continue;
379 break;
382 /* Propagate constants through copy operations and do constant
383 folding. Constants will be substituted to arguments by register
384 allocator where needed and possible. Also detect copies. */
385 switch (op) {
386 CASE_OP_32_64(mov):
387 if ((temps[args[1]].state == TCG_TEMP_COPY
388 && temps[args[1]].val == args[0])
389 || args[0] == args[1]) {
390 args += 2;
391 gen_opc_buf[op_index] = INDEX_op_nop;
392 break;
394 if (temps[args[1]].state != TCG_TEMP_CONST) {
395 tcg_opt_gen_mov(s, gen_args, args[0], args[1],
396 nb_temps, nb_globals);
397 gen_args += 2;
398 args += 2;
399 break;
401 /* Source argument is constant. Rewrite the operation and
402 let movi case handle it. */
403 op = op_to_movi(op);
404 gen_opc_buf[op_index] = op;
405 args[1] = temps[args[1]].val;
406 /* fallthrough */
407 CASE_OP_32_64(movi):
408 tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
409 gen_args += 2;
410 args += 2;
411 break;
412 CASE_OP_32_64(not):
413 CASE_OP_32_64(neg):
414 CASE_OP_32_64(ext8s):
415 CASE_OP_32_64(ext8u):
416 CASE_OP_32_64(ext16s):
417 CASE_OP_32_64(ext16u):
418 case INDEX_op_ext32s_i64:
419 case INDEX_op_ext32u_i64:
420 if (temps[args[1]].state == TCG_TEMP_CONST) {
421 gen_opc_buf[op_index] = op_to_movi(op);
422 tmp = do_constant_folding(op, temps[args[1]].val, 0);
423 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
424 gen_args += 2;
425 args += 2;
426 break;
427 } else {
428 reset_temp(args[0], nb_temps, nb_globals);
429 gen_args[0] = args[0];
430 gen_args[1] = args[1];
431 gen_args += 2;
432 args += 2;
433 break;
435 CASE_OP_32_64(add):
436 CASE_OP_32_64(sub):
437 CASE_OP_32_64(mul):
438 CASE_OP_32_64(or):
439 CASE_OP_32_64(and):
440 CASE_OP_32_64(xor):
441 CASE_OP_32_64(shl):
442 CASE_OP_32_64(shr):
443 CASE_OP_32_64(sar):
444 CASE_OP_32_64(rotl):
445 CASE_OP_32_64(rotr):
446 CASE_OP_32_64(andc):
447 CASE_OP_32_64(orc):
448 CASE_OP_32_64(eqv):
449 CASE_OP_32_64(nand):
450 CASE_OP_32_64(nor):
451 if (temps[args[1]].state == TCG_TEMP_CONST
452 && temps[args[2]].state == TCG_TEMP_CONST) {
453 gen_opc_buf[op_index] = op_to_movi(op);
454 tmp = do_constant_folding(op, temps[args[1]].val,
455 temps[args[2]].val);
456 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
457 gen_args += 2;
458 args += 3;
459 break;
460 } else {
461 reset_temp(args[0], nb_temps, nb_globals);
462 gen_args[0] = args[0];
463 gen_args[1] = args[1];
464 gen_args[2] = args[2];
465 gen_args += 3;
466 args += 3;
467 break;
469 case INDEX_op_call:
470 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
471 if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
472 for (i = 0; i < nb_globals; i++) {
473 reset_temp(i, nb_temps, nb_globals);
476 for (i = 0; i < (args[0] >> 16); i++) {
477 reset_temp(args[i + 1], nb_temps, nb_globals);
479 i = nb_call_args + 3;
480 while (i) {
481 *gen_args = *args;
482 args++;
483 gen_args++;
484 i--;
486 break;
487 case INDEX_op_set_label:
488 case INDEX_op_jmp:
489 case INDEX_op_br:
490 CASE_OP_32_64(brcond):
491 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
492 for (i = 0; i < def->nb_args; i++) {
493 *gen_args = *args;
494 args++;
495 gen_args++;
497 break;
498 default:
499 /* Default case: we do know nothing about operation so no
500 propagation is done. We only trash output args. */
501 for (i = 0; i < def->nb_oargs; i++) {
502 reset_temp(args[i], nb_temps, nb_globals);
504 for (i = 0; i < def->nb_args; i++) {
505 gen_args[i] = args[i];
507 args += def->nb_args;
508 gen_args += def->nb_args;
509 break;
513 return gen_args;
516 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
517 TCGArg *args, TCGOpDef *tcg_op_defs)
519 TCGArg *res;
520 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
521 return res;