2 * Optimizations for Tiny Code Generator for QEMU
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
31 #include "qemu-common.h"
34 #if TCG_TARGET_REG_BITS == 64
35 #define CASE_OP_32_64(x) \
36 glue(glue(case INDEX_op_, x), _i32): \
37 glue(glue(case INDEX_op_, x), _i64)
39 #define CASE_OP_32_64(x) \
40 glue(glue(case INDEX_op_, x), _i32)
51 struct tcg_temp_info
{
58 static struct tcg_temp_info temps
[TCG_MAX_TEMPS
];
60 /* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
61 class of equivalent temp's, a new representative should be chosen in this
63 static void reset_temp(TCGArg temp
, int nb_temps
, int nb_globals
)
66 TCGArg new_base
= (TCGArg
)-1;
67 if (temps
[temp
].state
== TCG_TEMP_HAS_COPY
) {
68 for (i
= temps
[temp
].next_copy
; i
!= temp
; i
= temps
[i
].next_copy
) {
69 if (i
>= nb_globals
) {
70 temps
[i
].state
= TCG_TEMP_HAS_COPY
;
75 for (i
= temps
[temp
].next_copy
; i
!= temp
; i
= temps
[i
].next_copy
) {
76 if (new_base
== (TCGArg
)-1) {
77 temps
[i
].state
= TCG_TEMP_ANY
;
79 temps
[i
].val
= new_base
;
82 temps
[temps
[temp
].next_copy
].prev_copy
= temps
[temp
].prev_copy
;
83 temps
[temps
[temp
].prev_copy
].next_copy
= temps
[temp
].next_copy
;
84 } else if (temps
[temp
].state
== TCG_TEMP_COPY
) {
85 temps
[temps
[temp
].next_copy
].prev_copy
= temps
[temp
].prev_copy
;
86 temps
[temps
[temp
].prev_copy
].next_copy
= temps
[temp
].next_copy
;
87 new_base
= temps
[temp
].val
;
89 temps
[temp
].state
= TCG_TEMP_ANY
;
90 if (new_base
!= (TCGArg
)-1 && temps
[new_base
].next_copy
== new_base
) {
91 temps
[new_base
].state
= TCG_TEMP_ANY
;
95 static int op_bits(int op
)
98 case INDEX_op_mov_i32
:
99 case INDEX_op_add_i32
:
100 case INDEX_op_sub_i32
:
101 case INDEX_op_mul_i32
:
102 case INDEX_op_and_i32
:
103 case INDEX_op_or_i32
:
104 case INDEX_op_xor_i32
:
105 case INDEX_op_shl_i32
:
106 case INDEX_op_shr_i32
:
107 case INDEX_op_sar_i32
:
108 #ifdef TCG_TARGET_HAS_rot_i32
109 case INDEX_op_rotl_i32
:
110 case INDEX_op_rotr_i32
:
112 #ifdef TCG_TARGET_HAS_not_i32
113 case INDEX_op_not_i32
:
115 #ifdef TCG_TARGET_HAS_ext8s_i32
116 case INDEX_op_ext8s_i32
:
118 #ifdef TCG_TARGET_HAS_ext16s_i32
119 case INDEX_op_ext16s_i32
:
121 #ifdef TCG_TARGET_HAS_ext8u_i32
122 case INDEX_op_ext8u_i32
:
124 #ifdef TCG_TARGET_HAS_ext16u_i32
125 case INDEX_op_ext16u_i32
:
128 #if TCG_TARGET_REG_BITS == 64
129 case INDEX_op_mov_i64
:
130 case INDEX_op_add_i64
:
131 case INDEX_op_sub_i64
:
132 case INDEX_op_mul_i64
:
133 case INDEX_op_and_i64
:
134 case INDEX_op_or_i64
:
135 case INDEX_op_xor_i64
:
136 case INDEX_op_shl_i64
:
137 case INDEX_op_shr_i64
:
138 case INDEX_op_sar_i64
:
139 #ifdef TCG_TARGET_HAS_rot_i64
140 case INDEX_op_rotl_i64
:
141 case INDEX_op_rotr_i64
:
143 #ifdef TCG_TARGET_HAS_not_i64
144 case INDEX_op_not_i64
:
146 #ifdef TCG_TARGET_HAS_ext8s_i64
147 case INDEX_op_ext8s_i64
:
149 #ifdef TCG_TARGET_HAS_ext16s_i64
150 case INDEX_op_ext16s_i64
:
152 #ifdef TCG_TARGET_HAS_ext32s_i64
153 case INDEX_op_ext32s_i64
:
155 #ifdef TCG_TARGET_HAS_ext8u_i64
156 case INDEX_op_ext8u_i64
:
158 #ifdef TCG_TARGET_HAS_ext16u_i64
159 case INDEX_op_ext16u_i64
:
161 #ifdef TCG_TARGET_HAS_ext32u_i64
162 case INDEX_op_ext32u_i64
:
167 fprintf(stderr
, "Unrecognized operation %d in op_bits.\n", op
);
172 static int op_to_movi(int op
)
174 switch (op_bits(op
)) {
176 return INDEX_op_movi_i32
;
177 #if TCG_TARGET_REG_BITS == 64
179 return INDEX_op_movi_i64
;
182 fprintf(stderr
, "op_to_movi: unexpected return value of "
183 "function op_bits.\n");
188 static void tcg_opt_gen_mov(TCGContext
*s
, TCGArg
*gen_args
, TCGArg dst
,
189 TCGArg src
, int nb_temps
, int nb_globals
)
191 reset_temp(dst
, nb_temps
, nb_globals
);
192 assert(temps
[src
].state
!= TCG_TEMP_COPY
);
193 /* Don't try to copy if one of temps is a global or either one
194 is local and another is register */
195 if (src
>= nb_globals
&& dst
>= nb_globals
&&
196 tcg_arg_is_local(s
, src
) == tcg_arg_is_local(s
, dst
)) {
197 assert(temps
[src
].state
!= TCG_TEMP_CONST
);
198 if (temps
[src
].state
!= TCG_TEMP_HAS_COPY
) {
199 temps
[src
].state
= TCG_TEMP_HAS_COPY
;
200 temps
[src
].next_copy
= src
;
201 temps
[src
].prev_copy
= src
;
203 temps
[dst
].state
= TCG_TEMP_COPY
;
204 temps
[dst
].val
= src
;
205 temps
[dst
].next_copy
= temps
[src
].next_copy
;
206 temps
[dst
].prev_copy
= src
;
207 temps
[temps
[dst
].next_copy
].prev_copy
= dst
;
208 temps
[src
].next_copy
= dst
;
214 static void tcg_opt_gen_movi(TCGArg
*gen_args
, TCGArg dst
, TCGArg val
,
215 int nb_temps
, int nb_globals
)
217 reset_temp(dst
, nb_temps
, nb_globals
);
218 temps
[dst
].state
= TCG_TEMP_CONST
;
219 temps
[dst
].val
= val
;
224 static int op_to_mov(int op
)
226 switch (op_bits(op
)) {
228 return INDEX_op_mov_i32
;
229 #if TCG_TARGET_REG_BITS == 64
231 return INDEX_op_mov_i64
;
234 fprintf(stderr
, "op_to_mov: unexpected return value of "
235 "function op_bits.\n");
240 static TCGArg
do_constant_folding_2(int op
, TCGArg x
, TCGArg y
)
261 case INDEX_op_shl_i32
:
262 return (uint32_t)x
<< (uint32_t)y
;
264 #if TCG_TARGET_REG_BITS == 64
265 case INDEX_op_shl_i64
:
266 return (uint64_t)x
<< (uint64_t)y
;
269 case INDEX_op_shr_i32
:
270 return (uint32_t)x
>> (uint32_t)y
;
272 #if TCG_TARGET_REG_BITS == 64
273 case INDEX_op_shr_i64
:
274 return (uint64_t)x
>> (uint64_t)y
;
277 case INDEX_op_sar_i32
:
278 return (int32_t)x
>> (int32_t)y
;
280 #if TCG_TARGET_REG_BITS == 64
281 case INDEX_op_sar_i64
:
282 return (int64_t)x
>> (int64_t)y
;
285 #ifdef TCG_TARGET_HAS_rot_i32
286 case INDEX_op_rotr_i32
:
287 #if TCG_TARGET_REG_BITS == 64
291 x
= (x
<< (32 - y
)) | (x
>> y
);
295 #ifdef TCG_TARGET_HAS_rot_i64
296 #if TCG_TARGET_REG_BITS == 64
297 case INDEX_op_rotr_i64
:
298 x
= (x
<< (64 - y
)) | (x
>> y
);
303 #ifdef TCG_TARGET_HAS_rot_i32
304 case INDEX_op_rotl_i32
:
305 #if TCG_TARGET_REG_BITS == 64
309 x
= (x
<< y
) | (x
>> (32 - y
));
313 #ifdef TCG_TARGET_HAS_rot_i64
314 #if TCG_TARGET_REG_BITS == 64
315 case INDEX_op_rotl_i64
:
316 x
= (x
<< y
) | (x
>> (64 - y
));
321 #if defined(TCG_TARGET_HAS_not_i32) || defined(TCG_TARGET_HAS_not_i64)
322 #ifdef TCG_TARGET_HAS_not_i32
323 case INDEX_op_not_i32
:
325 #ifdef TCG_TARGET_HAS_not_i64
326 case INDEX_op_not_i64
:
331 #if defined(TCG_TARGET_HAS_ext8s_i32) || defined(TCG_TARGET_HAS_ext8s_i64)
332 #ifdef TCG_TARGET_HAS_ext8s_i32
333 case INDEX_op_ext8s_i32
:
335 #ifdef TCG_TARGET_HAS_ext8s_i64
336 case INDEX_op_ext8s_i64
:
341 #if defined(TCG_TARGET_HAS_ext16s_i32) || defined(TCG_TARGET_HAS_ext16s_i64)
342 #ifdef TCG_TARGET_HAS_ext16s_i32
343 case INDEX_op_ext16s_i32
:
345 #ifdef TCG_TARGET_HAS_ext16s_i64
346 case INDEX_op_ext16s_i64
:
351 #if defined(TCG_TARGET_HAS_ext8u_i32) || defined(TCG_TARGET_HAS_ext8u_i64)
352 #ifdef TCG_TARGET_HAS_ext8u_i32
353 case INDEX_op_ext8u_i32
:
355 #ifdef TCG_TARGET_HAS_ext8u_i64
356 case INDEX_op_ext8u_i64
:
361 #if defined(TCG_TARGET_HAS_ext16u_i32) || defined(TCG_TARGET_HAS_ext16u_i64)
362 #ifdef TCG_TARGET_HAS_ext16u_i32
363 case INDEX_op_ext16u_i32
:
365 #ifdef TCG_TARGET_HAS_ext16u_i64
366 case INDEX_op_ext16u_i64
:
371 #if TCG_TARGET_REG_BITS == 64
372 #ifdef TCG_TARGET_HAS_ext32s_i64
373 case INDEX_op_ext32s_i64
:
377 #ifdef TCG_TARGET_HAS_ext32u_i64
378 case INDEX_op_ext32u_i64
:
385 "Unrecognized operation %d in do_constant_folding.\n", op
);
390 static TCGArg
do_constant_folding(int op
, TCGArg x
, TCGArg y
)
392 TCGArg res
= do_constant_folding_2(op
, x
, y
);
393 #if TCG_TARGET_REG_BITS == 64
394 if (op_bits(op
) == 32) {
401 /* Propagate constants and copies, fold constant expressions. */
402 static TCGArg
*tcg_constant_folding(TCGContext
*s
, uint16_t *tcg_opc_ptr
,
403 TCGArg
*args
, TCGOpDef
*tcg_op_defs
)
405 int i
, nb_ops
, op_index
, op
, nb_temps
, nb_globals
, nb_call_args
;
409 /* Array VALS has an element for each temp.
410 If this temp holds a constant then its value is kept in VALS' element.
411 If this temp is a copy of other ones then this equivalence class'
412 representative is kept in VALS' element.
413 If this temp is neither copy nor constant then corresponding VALS'
414 element is unused. */
416 nb_temps
= s
->nb_temps
;
417 nb_globals
= s
->nb_globals
;
418 memset(temps
, 0, nb_temps
* sizeof(struct tcg_temp_info
));
420 nb_ops
= tcg_opc_ptr
- gen_opc_buf
;
422 for (op_index
= 0; op_index
< nb_ops
; op_index
++) {
423 op
= gen_opc_buf
[op_index
];
424 def
= &tcg_op_defs
[op
];
425 /* Do copy propagation */
426 if (!(def
->flags
& (TCG_OPF_CALL_CLOBBER
| TCG_OPF_SIDE_EFFECTS
))) {
427 assert(op
!= INDEX_op_call
);
428 for (i
= def
->nb_oargs
; i
< def
->nb_oargs
+ def
->nb_iargs
; i
++) {
429 if (temps
[args
[i
]].state
== TCG_TEMP_COPY
) {
430 args
[i
] = temps
[args
[i
]].val
;
435 /* For commutative operations make constant second argument */
442 if (temps
[args
[1]].state
== TCG_TEMP_CONST
) {
452 /* Simplify expression if possible. */
459 #ifdef TCG_TARGET_HAS_rot_i32
460 case INDEX_op_rotl_i32
:
461 case INDEX_op_rotr_i32
:
463 #ifdef TCG_TARGET_HAS_rot_i64
464 case INDEX_op_rotl_i64
:
465 case INDEX_op_rotr_i64
:
467 if (temps
[args
[1]].state
== TCG_TEMP_CONST
) {
468 /* Proceed with possible constant folding. */
471 if (temps
[args
[2]].state
== TCG_TEMP_CONST
472 && temps
[args
[2]].val
== 0) {
473 if ((temps
[args
[0]].state
== TCG_TEMP_COPY
474 && temps
[args
[0]].val
== args
[1])
475 || args
[0] == args
[1]) {
477 gen_opc_buf
[op_index
] = INDEX_op_nop
;
479 gen_opc_buf
[op_index
] = op_to_mov(op
);
480 tcg_opt_gen_mov(s
, gen_args
, args
[0], args
[1],
481 nb_temps
, nb_globals
);
489 if ((temps
[args
[2]].state
== TCG_TEMP_CONST
490 && temps
[args
[2]].val
== 0)) {
491 gen_opc_buf
[op_index
] = op_to_movi(op
);
492 tcg_opt_gen_movi(gen_args
, args
[0], 0, nb_temps
, nb_globals
);
500 if (args
[1] == args
[2]) {
501 if (args
[1] == args
[0]) {
503 gen_opc_buf
[op_index
] = INDEX_op_nop
;
505 gen_opc_buf
[op_index
] = op_to_mov(op
);
506 tcg_opt_gen_mov(s
, gen_args
, args
[0], args
[1], nb_temps
,
516 /* Propagate constants through copy operations and do constant
517 folding. Constants will be substituted to arguments by register
518 allocator where needed and possible. Also detect copies. */
521 if ((temps
[args
[1]].state
== TCG_TEMP_COPY
522 && temps
[args
[1]].val
== args
[0])
523 || args
[0] == args
[1]) {
525 gen_opc_buf
[op_index
] = INDEX_op_nop
;
528 if (temps
[args
[1]].state
!= TCG_TEMP_CONST
) {
529 tcg_opt_gen_mov(s
, gen_args
, args
[0], args
[1],
530 nb_temps
, nb_globals
);
535 /* Source argument is constant. Rewrite the operation and
536 let movi case handle it. */
538 gen_opc_buf
[op_index
] = op
;
539 args
[1] = temps
[args
[1]].val
;
542 tcg_opt_gen_movi(gen_args
, args
[0], args
[1], nb_temps
, nb_globals
);
547 #ifdef TCG_TARGET_HAS_ext8s_i32
548 case INDEX_op_ext8s_i32
:
550 #ifdef TCG_TARGET_HAS_ext8s_i64
551 case INDEX_op_ext8s_i64
:
553 #ifdef TCG_TARGET_HAS_ext16s_i32
554 case INDEX_op_ext16s_i32
:
556 #ifdef TCG_TARGET_HAS_ext16s_i64
557 case INDEX_op_ext16s_i64
:
559 #ifdef TCG_TARGET_HAS_ext8u_i32
560 case INDEX_op_ext8u_i32
:
562 #ifdef TCG_TARGET_HAS_ext8u_i64
563 case INDEX_op_ext8u_i64
:
565 #ifdef TCG_TARGET_HAS_ext16u_i32
566 case INDEX_op_ext16u_i32
:
568 #ifdef TCG_TARGET_HAS_ext16u_i64
569 case INDEX_op_ext16u_i64
:
571 #if TCG_TARGET_REG_BITS == 64
572 case INDEX_op_ext32s_i64
:
573 case INDEX_op_ext32u_i64
:
575 if (temps
[args
[1]].state
== TCG_TEMP_CONST
) {
576 gen_opc_buf
[op_index
] = op_to_movi(op
);
577 tmp
= do_constant_folding(op
, temps
[args
[1]].val
, 0);
578 tcg_opt_gen_movi(gen_args
, args
[0], tmp
, nb_temps
, nb_globals
);
583 reset_temp(args
[0], nb_temps
, nb_globals
);
584 gen_args
[0] = args
[0];
585 gen_args
[1] = args
[1];
599 #ifdef TCG_TARGET_HAS_rot_i32
600 case INDEX_op_rotl_i32
:
601 case INDEX_op_rotr_i32
:
603 #ifdef TCG_TARGET_HAS_rot_i64
604 case INDEX_op_rotl_i64
:
605 case INDEX_op_rotr_i64
:
607 if (temps
[args
[1]].state
== TCG_TEMP_CONST
608 && temps
[args
[2]].state
== TCG_TEMP_CONST
) {
609 gen_opc_buf
[op_index
] = op_to_movi(op
);
610 tmp
= do_constant_folding(op
, temps
[args
[1]].val
,
612 tcg_opt_gen_movi(gen_args
, args
[0], tmp
, nb_temps
, nb_globals
);
617 reset_temp(args
[0], nb_temps
, nb_globals
);
618 gen_args
[0] = args
[0];
619 gen_args
[1] = args
[1];
620 gen_args
[2] = args
[2];
626 nb_call_args
= (args
[0] >> 16) + (args
[0] & 0xffff);
627 if (!(args
[nb_call_args
+ 1] & (TCG_CALL_CONST
| TCG_CALL_PURE
))) {
628 for (i
= 0; i
< nb_globals
; i
++) {
629 reset_temp(i
, nb_temps
, nb_globals
);
632 for (i
= 0; i
< (args
[0] >> 16); i
++) {
633 reset_temp(args
[i
+ 1], nb_temps
, nb_globals
);
635 i
= nb_call_args
+ 3;
643 case INDEX_op_set_label
:
646 CASE_OP_32_64(brcond
):
647 memset(temps
, 0, nb_temps
* sizeof(struct tcg_temp_info
));
648 for (i
= 0; i
< def
->nb_args
; i
++) {
655 /* Default case: we do know nothing about operation so no
656 propagation is done. We only trash output args. */
657 for (i
= 0; i
< def
->nb_oargs
; i
++) {
658 reset_temp(args
[i
], nb_temps
, nb_globals
);
660 for (i
= 0; i
< def
->nb_args
; i
++) {
661 gen_args
[i
] = args
[i
];
663 args
+= def
->nb_args
;
664 gen_args
+= def
->nb_args
;
672 TCGArg
*tcg_optimize(TCGContext
*s
, uint16_t *tcg_opc_ptr
,
673 TCGArg
*args
, TCGOpDef
*tcg_op_defs
)
676 res
= tcg_constant_folding(s
, tcg_opc_ptr
, args
, tcg_op_defs
);