1 /* Support for avr-passes.def for AVR 8-bit microcontrollers.
2 Copyright (C) 2024-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
23 #define INCLUDE_VECTOR
27 #include "coretypes.h"
44 #include "tree-pass.h"
45 #include "insn-attr.h"
46 #include "tm-constrs.h"
49 #define CONST_INT_OR_FIXED_P(X) (CONST_INT_P (X) || CONST_FIXED_P (X))
51 #define FIRST_GPR (AVR_TINY ? REG_18 : REG_2)
54 // Emit pattern PAT, and ICE when the insn is not valid / not recognized.
57 emit_valid_insn (rtx pat
)
59 rtx_insn
*insn
= emit_insn (pat
);
61 if (! valid_insn_p (insn
)) // Also runs recog().
62 fatal_insn ("emit unrecognizable insn", insn
);
67 // Emit a single_set with an optional scratch operand. This function
68 // asserts that the new insn is valid and recognized.
71 emit_valid_move_clobbercc (rtx dest
, rtx src
, rtx scratch
= NULL_RTX
)
74 ? gen_gen_move_clobbercc_scratch (dest
, src
, scratch
)
75 : gen_gen_move_clobbercc (dest
, src
);
77 return emit_valid_insn (pat
);
84 /////////////////////////////////////////////////////////////////////////////
85 // Before we start with the very code, introduce some helpers that are
86 // quite generic, though up to now only avr-fuse-add makes use of them.
88 /* Get the next / previous NONDEBUG_INSN_P after INSN in basic block BB.
89 This assumes we are in CFG layout mode so that BLOCK_FOR_INSN()
93 next_nondebug_insn_bb (basic_block bb
, rtx_insn
*insn
, bool forward
= true)
97 insn
= forward
? NEXT_INSN (insn
) : PREV_INSN (insn
);
99 if (insn
&& NONDEBUG_INSN_P (insn
))
100 return BLOCK_FOR_INSN (insn
) == bb
? insn
: nullptr;
107 prev_nondebug_insn_bb (basic_block bb
, rtx_insn
*insn
)
109 return next_nondebug_insn_bb (bb
, insn
, false);
113 /* Like `single_set' with the addition that it sets REGNO_SCRATCH when the
114 insn is a single_set with a QImode scratch register. When the insn has
115 no QImode scratch or just a scratch:QI, then set REGNO_SCRATCH = 0.
116 The assumption is that the function is only used after the splits for
117 REG_CC so that the pattern is a parallel with 2 elements (INSN has no
118 scratch operand), or 3 elements (INSN does have a scratch operand). */
121 single_set_with_scratch (rtx_insn
*insn
, int ®no_scratch
)
128 rtx set
, clo
, reg
, pat
= PATTERN (insn
);
130 // Search for SET + CLOBBER(QI) + CLOBBER(CC).
131 if (GET_CODE (pat
) == PARALLEL
132 && XVECLEN (pat
, 0) == 3
133 && GET_CODE (set
= XVECEXP (pat
, 0, 0)) == SET
134 // At this pass, all insn are endowed with clobber(CC).
135 && GET_CODE (clo
= XVECEXP (pat
, 0, 2)) == CLOBBER
136 && GET_MODE (XEXP (clo
, 0)) == CCmode
137 && GET_CODE (clo
= XVECEXP (pat
, 0, 1)) == CLOBBER
138 && REG_P (reg
= XEXP (clo
, 0))
139 && GET_MODE (reg
) == QImode
)
141 regno_scratch
= REGNO (reg
);
145 return single_set (insn
);
149 // One bit for each GRP in REG_0 ... REG_31.
150 using gprmask_t
= uint32_t;
152 // True when this is a valid GPR number for ordinary code, e.g.
153 // registers wider than 2 bytes have to start at an exven regno.
154 // TMP_REG and ZERO_REG are not considered valid, even though
155 // the C source can use register vars with them.
157 gpr_regno_p (int regno
, int n_bytes
= 1)
159 return (IN_RANGE (regno
, FIRST_GPR
, REG_32
- n_bytes
)
160 // Size in { 1, 2, 3, 4, 8 } bytes.
161 && ((1u << n_bytes
) & 0x11e)
162 // Registers >= 2 bytes start at an even regno.
163 && (n_bytes
== 1 || regno
% 2 == 0));
166 // There are cases where the C source defines local reg vars
167 // for R1 etc. The assumption is that this is handled before
168 // calling this function, e.g. by skipping code when a register
169 // overlaps with a fixed register.
170 static inline gprmask_t
171 regmask (int regno
, int size
)
173 gcc_checking_assert (gpr_regno_p (regno
, size
));
174 gprmask_t bits
= (1u << size
) - 1;
176 return bits
<< regno
;
179 // Mask for hard register X that's some GPR, including fixed regs like R0.
183 gcc_assert (REG_P (x
));
184 gprmask_t bits
= (1u << GET_MODE_SIZE (GET_MODE (x
))) - 1;
186 return bits
<< REGNO (x
);
190 // Whether X has bits in the range [B0 ... B1]
192 has_bits_in (gprmask_t x
, int b0
, int b1
)
194 if (b0
> b1
|| b0
> 31 || b1
< 0)
197 const gprmask_t m
= (2u << (b1
- b0
)) - 1;
198 return x
& (m
<< b0
);
208 #define select false ? bad_case
213 // Returns true when we a scratch reg is needed in order to get
214 // (siged or unsigned) 8-bit value VAL in some GPR.
215 // When it's about costs rather than the sheer requirement for a
216 // scratch, see also AVRasm::constant_cost.
217 static inline bool ldi_needs_scratch (int regno
, int val
)
219 return regno
< REG_16
&& IN_RANGE (val
& 0xff, 2, 254);
222 // Return a byte value x >= 0 such that x <code> y == x for all y, or -1.
223 static inline int neutral_val (rtx_code code
)
233 // When there exists a value x such that the image of the function
234 // y -> y <code> x has order 1, then return that x. Else return -1.
235 static inline int image1_val (rtx_code code
)
243 // Cost of 8-bit binary operation x o= VAL provided a scratch is
244 // available as needed.
245 static int constant_cost (rtx_code code
, int regno
, uint8_t val
)
247 bool needs_scratch_p
= select
<bool>()
248 : code
== PLUS
? regno
< REG_16
&& val
!= 1 && val
!= 0xff
249 : code
== XOR
? val
!= 0xff && (regno
< REG_16
|| val
!= 0x80)
250 : code
== IOR
? regno
< REG_16
251 : code
== AND
? regno
< REG_16
&& val
!= 0
252 : code
== SET
? regno
< REG_16
&& val
!= 0
255 return val
== AVRasm::neutral_val (code
)
257 : 1 + needs_scratch_p
;
262 // Returns the mode mask for a mode size of SIZE bytes.
263 static uint64_t size_to_mask (int size
)
265 return ((uint64_t) 2 << (8 * size
- 1)) - 1;
268 // Return the scalar int mode for a modesize of 1, 2, 3, 4 or 8 bytes.
269 static machine_mode
size_to_mode (int size
)
271 return select
<machine_mode
>()
274 : size
== 3 ? PSImode
277 : bad_case
<machine_mode
> ();
281 //////////////////////////////////////////////////////////////////////////////
282 // Optimize moves after reload: -mfuse-move=<0,23>
284 /* The purpose of this pass is to perform optimizations after reload
285 like the following ones:
287 Without optimization | With optimization
288 ==================== | =================
290 long long fn_zero (void) (1)
295 ldi r18, 0 ; movqi_insn | ldi r18, 0 ; movqi_insn
296 ldi r19, 0 ; movqi_insn | ldi r19, 0 ; movqi_insn
297 ldi r20, 0 ; movqi_insn | movw r20, r18 ; *movhi
298 ldi r21, 0 ; movqi_insn |
299 ldi r22, 0 ; movqi_insn | movw r22, r18 ; *movhi
300 ldi r23, 0 ; movqi_insn |
301 ldi r24, 0 ; movqi_insn | movw r24, r18 ; *movhi
302 ldi r25, 0 ; movqi_insn |
305 int fn_eq0 (char c) (2)
310 mov r18, r24 ; movqi_insn | mov r18, r24 ; movqi_insn
311 ldi r24, 1 ; *movhi | ldi r24, 1 ; *movhi
312 ldi r25, 0 | ldi r25, 0
313 cp r18, ZERO ; cmpqi3 | cpse r18, ZERO ; peephole
315 ldi r24, 0 ; *movhi | ldi r24, 0 ; movqi_insn
321 void fn_store_ab (void)
327 ldi r24, 1 ; *movhi | ldi r24, 1 ; *movhi
328 ldi r25, 0 | ldi r25, 0
329 sts a+1, r25 ; *movhi | sts a+1, r25 ; *movhi
330 sts a, r24 | sts a, r24
331 ldi r24, -1 ; *movhi | sbiw r24, 2 ; *addhi3
333 sts b+1, r25 ; *movhi | sts b+1, r25 ; *movhi
334 sts b, r24 | sts b, r24
337 unsigned fn_crc (unsigned x, unsigned y) (4)
339 for (char i = 8; i--; x <<= 1)
340 y ^= (x ^ y) & 0x80 ? 79U : 0U;
344 movw r18, r24 ; *movhi | movw r18, r24 ; *movhi
345 movw r24, r22 ; *movhi | movw r24, r22 ; *movhi
346 ldi r22, 8 ; movqi_insn | ldi r22, 8 ; movqi_insn
348 movw r30, r18 ; *movhi | movw r30, r18 ; *movhi
349 eor r30, r24 ; *xorqi3 | eor r30, r24 ; *xorqi3
350 eor r31, r25 ; *xorqi3 | eor r31, r25 ; *xorqi3
351 mov r20, r30 ; *andhi3 | mov r20, r30 ; *andqi3
352 andi r20, 1<<7 | andi r20, 1<<7
354 sbrs r30, 7 ; *sbrx_branchhi | sbrc r30, 7 ; *sbrx_branchhi
356 ldi r20, 79 ; movqi_insn | ldi r20, 79 ; movqi_insn
357 ldi r21, 0 ; movqi_insn |
358 eor r24, r20 ; *xorqi3 | eor r24, r20 ; *xorqi3
359 eor r25, r21 ; *xorqi3 |
360 lsl r18 ; *ashlhi3_const | lsl r18 ; *ashlhi3_const
362 subi r22, 1 ; *op8.for.cczn.p| subi r22, 1 ; *op8.for.cczn.plus
363 brne .L13 ; branch_ZN | brne .L13 ; branch_ZN
366 #define SPDR (*(uint8_t volatile*) 0x2c) (5)
368 void fn_PR49807 (long big)
376 movw r20, r22 ; *movhi | movw r20, r22 ; *movhi
377 movw r22, r24 ; *movhi | movw r22, r24 ; *movhi
378 mov r24, r23 ; *ashrsi3_const |
384 out 0xc, r24 ; movqi_insn | out 0xc, r23 ; movqi_insn
385 movw r24, r22 ; *ashrsi3_const |
390 out 0xc, r24 ; movqi_insn | out 0xc, r24 ; movqi_insn
391 clr r27 ; *ashrsi3_const |
397 out 0xc, r24 ; movqi_insn | out 0xc, r21 ; movqi_insn
398 out 0xc, r20 ; movqi_insn | out 0xc, r20 ; movqi_insn
401 The insns of each basic block are traversed from first to last.
402 Each insn is optimized on its own, or may be fused with the
403 previous insn like in example (1).
404 As the insns are traversed, memento_t keeps track of known values
405 held in the GPRs (general purpse registers) R2 ... R31 by simulating
406 the effect of the current insn in memento_t.apply_insn().
407 The basic blocks are traversed in reverse post order so as to
408 maximize the chance that GPRs from all preceding blocks are known,
409 which is the case in example (2). The traversal of the basic block
410 is performed by bbinfo_t.optimize_one_function().
411 bbinfo_t.optimize_one_block() traverses the insns of a BB and tries
412 the following optimizations:
415 Try to fuse two 8-bit insns to one MOVW like in (1).
417 bbinfo_t::try_simplify_p
418 Only perform the simplest optimizations that don't impede the
419 traceability of the generated code, which are:
420 - Transform operations like Rn = Rn=0 ^ Rm to Rn = Rm.
421 - Remove insns that are no-ops like Rn = Rn ^ Rm=0.
423 bbinfo_t::try_bin_arg1_p
424 In insns like EOR Rn, arg1 where arg1 is known or is a reg that
425 dies in the insn, *and* there is a different register Rm that's
426 known to contain the same value, then arg1 is replaced with Rm.
428 bbinfo_t::try_split_ldi_p
429 Tries to simplify loads of constants like in examples (1), (2) and (3).
430 It may use arithmetic instructions like AND with registers that
431 are holding known values when this is profitable.
433 bbinfo_t::try_split_any_p
434 Split all insns where the operation can be performed on individual
435 bytes, like andsi3. In example (4) the andhi3 can be optimized
439 Try to fuse a mem = reg insn to mem = __zero_reg__.
440 This should only occur when -msplit-ldst is on, but may
441 also occur with pushes since push<mode>1 splits them.
445 // A basic block with additional information like the GPR state.
446 // The main entry point for the pass. Runs various strategies
447 // like try_fuse, try_simplify, try_bin_arg1, try_split_ldi, try_split_any
448 // depending on -mfuse-add=<0,11>.
451 // Additional insn information on a REG = non-memory single_set insn
452 // for quick access. Only valid when the m_size member is non-zero.
455 // Helper classes with data needed by the try_xxx optimizers.
456 struct optimize_data_t
;
457 struct insn_optimize_data_t
;
459 // Records which GPRs R0 ... R31 are holding a known value,
460 // and which values these are.
463 // Abstract Interpretation of expressions.
464 // absint_val_t represents an 8-bit value that equals the content of
465 // some GPR, or equals some known value (or both, or none of them).
466 // absint_byte_t represents an 8-bit entity that is equivalent to
467 // an absint_val_t, or is equivalent to some (unary or binary) operation
468 // on absint_val_t's like NOT, AND, IOR, XOR that operate bit-wise (and
469 // hence also byte-wise).
470 // absint_t represents an array of absint_byte_t's. When some insn is applied
471 // to a GPR state, then memento_t.apply_insn() represents the RHS of
472 // a single_set as an absint_t, and then applies that result to the GPRs.
473 // For example, in int y = x << 8 the representation is x = [r25; r24]
474 // and RHS = [r24; 00].
479 // A ply_t is a potential step towards an optimal sequence to load a constant
480 // value into a multi-byte register. A ply_t loosely relates to one AVR
481 // instruction, but it may also represent a sequence of instructions.
482 // For example, loading a constant into a lower register when no sratch reg
483 // is available may take up to 4 instructions. There is no 1:1 correspondence
485 // try_split_ldi determines the best sequence of ply_t's by means of a
486 // brute-force search with tree pruning: It's much too complicated to
487 // construct a good sequence directly, but there are many conditions that
488 // good sequence will satisfy, implemented in bbinfo_t::find_plies.
492 // The maximal number of ply_t's in any conceivable optimal solution
493 // that is better than what a vanilla mov<mode> generates.
494 // This is 6 for modes <= 4 and 8 for modes == 8.
495 static constexpr int N_BEST_PLYS
= 8;
497 #define FUSE_MOVE_MAX_MODESIZE 8
499 #include "avr-passes-fuse-move.h"
503 gprmask_t
memento_t::fixed_regs_mask
;
507 int ply_t::max_n_ply_ts
;
508 int plies_t::max_n_plies
;
510 bbinfo_t
*bbinfo_t::bb_info
;
512 bbinfo_t::find_plies_data_t
*bbinfo_t::fpd
;
514 // Which optimizations should be performed.
515 bool bbinfo_t::try_fuse_p
;
516 bool bbinfo_t::try_bin_arg1_p
;
517 bool bbinfo_t::try_split_ldi_p
;
518 bool bbinfo_t::try_split_any_p
;
519 bool bbinfo_t::try_simplify_p
;
520 bool bbinfo_t::use_arith_p
;
521 bool bbinfo_t::use_set_some_p
;
522 bool bbinfo_t::try_mem0_p
;
525 // Abstract Interpretation of expressions.
526 // A bunch of absint_byte_t's.
530 static constexpr int eq_size
= FUSE_MOVE_MAX_MODESIZE
;
531 std::array
<absint_byte_t
, eq_size
> eq
;
534 rtx xexp_new
= NULL_RTX
;
536 absint_byte_t
&operator[] (int i
)
538 gcc_assert (IN_RANGE (i
, 0, absint_t::eq_size
- 1));
542 const absint_byte_t
&operator[] (int i
) const
544 gcc_assert (IN_RANGE (i
, 0, absint_t::eq_size
- 1));
554 absint_t (rtx xold
, rtx xnew
, int n_bytes
)
555 : xexp(xold
), xexp_new(xnew
)
557 gcc_assert (n_bytes
<= eq_size
);
559 for (int i
= 0; i
< n_bytes
; ++i
)
560 eq
[i
].learn_val8 (avr_uint8 (xnew
, i
));
563 // CODE != UNKNOWN: Maximal index of a byte with code CODE, or -1.
564 // CODE == UNKNOWN: Maximal index of a byte with known CODE, or -1.
565 int max_knows (rtx_code code
= UNKNOWN
) const
567 for (int i
= eq_size
- 1; i
>= 0; --i
)
568 if ((code
== UNKNOWN
&& ! eq
[i
].can (UNKNOWN
))
569 || (code
!= UNKNOWN
&& eq
[i
].can (code
)))
574 // CODE != UNKNOWN: Maximal i such that all bytes < i have code CODE.
575 // CODE == UNKNOWN: Maximal i such that all bytes < i have code != UNKNOWN.
576 int end_knows (rtx_code code
= UNKNOWN
) const
578 for (int i
= 0; i
< eq_size
; ++i
)
579 if ((code
== UNKNOWN
&& eq
[i
].can (UNKNOWN
))
580 || (code
!= UNKNOWN
&& ! eq
[i
].can (code
)))
585 // Number of bytes for which there is usable information.
586 int popcount () const
589 for (int i
= 0; i
< eq_size
; ++i
)
590 pop
+= ! eq
[i
].can (UNKNOWN
);
594 // Get the value under the assumption that all eq[].val8 are known.
595 uint64_t get_value (int n_bytes
, bool strict
= true) const
597 gcc_assert (IN_RANGE (n_bytes
, 1, eq_size
));
598 gcc_assert (! strict
|| end_knows (CONST_INT
) >= n_bytes
);
601 for (int i
= n_bytes
- 1; i
>= 0; --i
)
602 val
= 256 * val
+ eq
[i
].val8 (strict
);
606 // Get n-byte value as a const_int, or NULL_RTX when (partially) unknown.
607 rtx
get_value_as_const_int (int n_bytes
) const
609 gcc_checking_assert (gpr_regno_p (REG_24
, n_bytes
));
611 if (end_knows (CONST_INT
) < n_bytes
)
614 const uint64_t val
= get_value (n_bytes
);
615 const machine_mode mode
= size_to_mode (n_bytes
);
617 return gen_int_mode (val
, mode
);
620 // Find a 16-bit register that contains the same value like held
621 // in positions I1 and I2 (if any). Return 0 when nothing appropriate
622 // for a MOVW is found.
623 int reg16_with_value (int i1
, int i2
, const memento_t
&memo
) const
627 const int lo8
= eq
[i1
& ~1].val8 (false);
628 const int hi8
= eq
[i1
| 1].val8 (false);
629 if (lo8
>= 0 && hi8
>= 0)
630 return memo
.reg16_with_value (lo8
, hi8
, 0);
635 // When X is a REG rtx with a known content as of MEMO, then return
636 // the respective value as a constant for mode MODE.
637 // If X is NULL_RTX, or not a REG, or not known, then return NULL_RTX.
638 static rtx
maybe_fold (rtx x
, const memento_t
&memo
)
644 && (n_bytes
= GET_MODE_SIZE (GET_MODE (x
))) <= FUSE_MOVE_MAX_MODESIZE
645 && gpr_regno_p (REGNO (x
), n_bytes
))
647 rtx xval
= memo
.get_value_as_const_int (REGNO (x
), n_bytes
);
649 return avr_chunk (GET_MODE (x
), xval
, 0);
655 // Try to conclude about the bytes that comprise X. DEST_MODE is the
656 // context mode that is used when X is CONST_INT and has VOIDmode.
657 static absint_t
explore (rtx x
, const memento_t
&memo
,
658 machine_mode dest_mode
= VOIDmode
)
660 const rtx_code code
= GET_CODE (x
);
661 bool worth_dumping
= dump_file
&& (dump_flags
& TDF_FOLDING
);
663 const machine_mode mode
= GET_MODE (x
) == VOIDmode
667 const int n_bytes
= mode
== VOIDmode
&& CONST_INT_P (x
)
669 : GET_MODE_SIZE (mode
);
671 if (! IN_RANGE (n_bytes
, 1, absint_t::eq_size
))
674 // Eat our own dog food as produced by try_plit_ldi.
676 rtx xop0
= BINARY_P (x
) || UNARY_P (x
) ? XEXP (x
, 0) : NULL_RTX
;
677 rtx xval0
= xop0
&& CONST_INT_OR_FIXED_P (xop0
)
679 : absint_t::maybe_fold (xop0
, memo
);
683 && GET_MODE (xop0
) == mode
686 rtx y
= simplify_unary_operation (code
, mode
, xval0
, mode
);
687 if (y
&& CONST_INT_OR_FIXED_P (y
))
688 return absint_t (x
, y
, n_bytes
);
691 rtx xop1
= BINARY_P (x
) ? XEXP (x
, 1) : NULL_RTX
;
692 rtx xval1
= xop1
&& CONST_INT_OR_FIXED_P (xop1
)
694 : absint_t::maybe_fold (xop1
, memo
);
699 rtx y
= simplify_binary_operation (code
, mode
, xval0
, xval1
);
700 if (y
&& CONST_INT_OR_FIXED_P (y
))
701 return absint_t (x
, y
, n_bytes
);
704 // No fold to a constant value was found:
705 // Look at the individual bytes more closely.
712 worth_dumping
= false;
716 if (END_REGNO (x
) <= REG_32
717 && ! (regmask (x
) & memento_t::fixed_regs_mask
))
718 for (unsigned r
= REGNO (x
); r
< END_REGNO (x
); ++r
)
720 ai
[r
- REGNO (x
)].learn_regno (r
);
722 ai
[r
- REGNO (x
)].learn_val8 (memo
.value (r
));
727 ai
= absint_t (x
, x
, n_bytes
);
735 if ((CONST_INT_P (xop1
) && INTVAL (xop1
) >= 8)
736 // DImode shift offsets for transparent calls are shipped in R16.
738 ai
= explore_shift (x
, memo
);
745 const absint_t ai0
= absint_t::explore (xop0
, memo
, mode
);
746 const absint_t ai1
= absint_t::explore (xop1
, memo
, mode
);
747 for (int i
= 0; i
< n_bytes
; ++i
)
748 ai
[i
] = absint_byte_t (code
, ai0
[i
], ai1
[i
]);
754 const absint_t ai0
= absint_t::explore (xop0
, memo
);
755 for (int i
= 0; i
< n_bytes
; ++i
)
756 ai
[i
] = absint_byte_t (NOT
, ai0
[i
]);
763 const absint_t ai0
= absint_t::explore (xop0
, memo
);
764 const int ai0_size
= GET_MODE_SIZE (GET_MODE (xop0
));
765 const absint_byte_t b_signs
= ai0
[ai0_size
- 1].get_signs (code
);
766 for (int i
= 0; i
< n_bytes
; ++i
)
767 ai
[i
] = i
< ai0_size
? ai0
[i
] : b_signs
;
773 if (SCALAR_INT_MODE_P (mode
)
774 || ALL_SCALAR_FIXED_POINT_MODE_P (mode
))
776 const absint_t ai0
= absint_t::explore (xop0
, memo
, mode
);
777 const absint_t ai1
= absint_t::explore (xop1
, memo
, mode
);
779 for (int i
= 0; i
< n_bytes
&& ai1
[i
].val8 (false) == 0; ++i
)
783 for (int i
= 0; i
< n_bytes
; ++i
)
785 if (ai0
[i
].val8 (false) == 0)
787 else if (ai1
[i
].val8 (false) == 0)
791 ai
[i
] = absint_byte_t (code
, ai0
[i
], ai1
[i
]);
797 && GET_CODE (xop0
) == ZERO_EXTEND
798 && CONST_INT_P (xop1
))
800 rtx exop
= XEXP (xop0
, 0);
801 int exsize
= GET_MODE_SIZE (GET_MODE (exop
));
802 rtx lo_xop1
= avr_chunk (GET_MODE (exop
), xop1
, 0);
803 if (lo_xop1
== const0_rtx
)
804 for (int i
= exsize
; i
< n_bytes
; ++i
)
808 break; // PLUS, MINUS
811 if (GET_MODE (xop0
) == mode
812 && SCALAR_INT_MODE_P (mode
))
814 // The constant may be located in xop0's zero_extend...
815 const absint_t ai0
= absint_t::explore (xop0
, memo
, mode
);
816 const absint_t ai1
= absint_t::explore (xop1
, memo
, mode
);
817 const int end0
= ai0
.end_knows (CONST_INT
);
818 const int end1
= ai1
.end_knows (CONST_INT
);
819 const uint64_t mul0
= end0
> 0 ? ai0
.get_value (end0
) : 1;
820 const uint64_t mul1
= end1
> 0 ? ai1
.get_value (end1
) : 1;
821 // Shifting in off/8 zero bytes from the right.
822 const int off
= mul0
* mul1
!= 0 ? ctz_hwi (mul0
* mul1
) : 0;
823 for (int i
= 0; i
< off
/ 8; ++i
)
824 ai
[i
].learn_val8 (0);
829 if (GET_MODE (xop0
) == mode
)
831 const absint_t ai0
= absint_t::explore (xop0
, memo
);
832 for (int i
= 0; i
< n_bytes
; ++i
)
833 ai
[i
] = ai0
[n_bytes
- 1 - i
];
840 avr_dump (";; AI.explore %C:%m ", code
, mode
);
844 for (int i
= 0; i
< n_bytes
; ++i
)
845 gcc_assert (ai
[i
].check ());
850 // Helper for the method above.
851 static absint_t
explore_shift (rtx x
, const memento_t
&memo
)
855 const rtx_code code
= GET_CODE (x
);
856 const machine_mode mode
= GET_MODE (x
);
857 const int n_bytes
= GET_MODE_SIZE (mode
);
862 rtx xop0
= XEXP (x
, 0);
863 rtx xop1
= XEXP (x
, 1);
865 // Look at shift offsets of DImode more closely;
866 // they are in R16 for __lshrdi3 etc. Patch xop1 on success.
868 && ! CONST_INT_P (xop1
)
869 && GET_MODE (xop0
) == mode
)
871 const int n_off
= GET_MODE_SIZE (GET_MODE (xop1
));
872 const absint_t aoff
= absint_t::explore (xop1
, memo
);
873 xop1
= aoff
.get_value_as_const_int (n_off
);
877 || GET_MODE (xop0
) != mode
878 || ! IN_RANGE (n_bytes
, 1, FUSE_MOVE_MAX_MODESIZE
)
879 || ! CONST_INT_P (xop1
)
880 || ! IN_RANGE (INTVAL (xop1
), 8, 8 * n_bytes
- 1))
883 const int off
= INTVAL (xop1
);
884 const absint_t ai0
= absint_t::explore (xop0
, memo
);
886 switch (GET_CODE (x
))
892 // Shifting in 0x00's from the right.
893 for (int i
= 0; i
< off
/ 8; ++i
)
894 ai
[i
].learn_val8 (0);
900 // Shifting in 0x00's or signs from the left.
901 absint_byte_t b_signs
= ai0
[n_bytes
- 1].get_signs (GET_CODE (x
));
902 for (int i
= n_bytes
- off
/ 8; i
< n_bytes
; ++i
)
904 if (off
== 8 * n_bytes
- 1)
905 if (code
== ASHIFTRT
)
912 || ai0
.popcount () == 0)
915 // For shift offsets that are a multiple of 8, record the
916 // action on the constituent bytes.
918 // Bytes are moving left by this offset (or zero for "none").
919 const int boffL
= select
<int>()
920 : code
== ROTATE
|| code
== ASHIFT
? off
/ 8
921 : code
== ROTATERT
? n_bytes
- off
/ 8
924 // Bytes are moving right by this offset (or zero for "none").
925 const int boffR
= select
<int>()
926 : code
== ROTATERT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
? off
/ 8
927 : code
== ROTATE
? n_bytes
- off
/ 8
930 if (dump_flags
& TDF_FOLDING
)
932 avr_dump (";; AI.explore_shift %C:%m ", code
, mode
);
934 avr_dump ("<< %d%s", 8 * boffL
, boffL
&& boffR
? ", " : "");
936 avr_dump (">> %d", 8 * boffR
);
941 for (int i
= 0; i
< n_bytes
- boffL
; ++i
)
942 ai
[i
+ boffL
] = ai0
[i
];
945 for (int i
= boffR
; i
< n_bytes
; ++i
)
946 ai
[i
- boffR
] = ai0
[i
];
951 void dump (const char *msg
= nullptr, FILE *f
= dump_file
) const
954 dump (NULL_RTX
, msg
, f
);
957 void dump (rtx dest
, const char *msg
= nullptr, FILE *f
= dump_file
) const
961 int regno
= dest
&& REG_P (dest
) ? REGNO (dest
) : 0;
963 msg
= msg
&& msg
[0] ? msg
: "AI=[%s]\n";
964 const char *const xs
= strstr (msg
, "%s");
967 fprintf (f
, "%.*s", (int) (xs
- msg
), msg
);
968 for (int i
= max_knows (); i
>= 0; --i
)
970 const int sub_regno
= eq
[i
].regno (false /*nonstrict*/);
971 const bool nop
= regno
&& sub_regno
== regno
+ i
;
972 eq
[i
].dump (nop
? "%s=nop" : "%s", f
);
973 fprintf (f
, "%s", i
? "; " : "");
975 fprintf (f
, "%s", xs
+ strlen ("%s"));
981 // Information for a REG = non-memory single_set.
985 // This is an insn that sets the m_size bytes of m_regno to either
986 // - A compile time constant m_isrc (m_code = CONST_INT), or
987 // - The contents of register number m_rsrc (m_code = REG).
993 rtx_insn
*m_insn
= nullptr;
994 rtx m_set
= NULL_RTX
;
995 rtx m_src
= NULL_RTX
;
996 int m_scratch
= 0; // 0 or the register number of a QImode scratch.
997 rtx_code m_old_code
= UNKNOWN
;
999 // Knowledge about the bytes of the SET_SRC: A byte may have a known
1000 // value, may be known to equal some register (e.g. with BSWAP),
1001 // or both, or may be unknown.
1004 // May be set for binary operations.
1005 absint_byte_t m_new_src
;
1007 bool init1 (insn_optimize_data_t
&, int max_size
, const char *purpose
);
1009 // Upper bound for the cost (in words) of a move<mode> insn that
1010 // performs a REG = CONST_XXX = .m_isrc move of modesize .m_size.
1012 bool combine (const insninfo_t
&prev
, const insninfo_t
&curr
);
1013 int emit_insn () const;
1015 bool needs_scratch () const
1017 gcc_assert (m_code
== CONST_INT
);
1019 for (int i
= 0; i
< m_size
; ++i
)
1020 if (AVRasm::ldi_needs_scratch (m_regno
, m_isrc
>> (8 * i
)))
1026 int hamming (const memento_t
&memo
) const
1028 gcc_assert (m_code
== CONST_INT
);
1031 for (int i
= 0; i
< m_size
; ++i
)
1032 h
+= ! memo
.have_value (m_regno
+ i
, 1, 0xff & (m_isrc
>> (8 * i
)));
1037 // Upper bound for the number of ply_t's of a solution, given Hamming
1038 // distance of HAMM (-1 for unknown).
1039 int n_best_plys (int hamm
= -1) const
1041 gcc_assert (m_code
== CONST_INT
);
1044 return (hamm
>= 0 ? hamm
: m_size
);
1046 return (hamm
>= 0 ? hamm
: m_size
)
1047 // The following terms is the max number of MOVWs with a
1048 // Hamming difference of less than 2.
1049 + (AVR_HAVE_MOVW
&& m_regno
< REG_14
) * m_size
/ 2
1050 + (AVR_HAVE_MOVW
&& m_regno
== REG_14
) * std::max (0, m_size
- 2)
1051 - (AVR_HAVE_MOVW
&& hamm
== 4 && (uint32_t) m_isrc
% 0x10001 == 0);
1058 struct insn_optimize_data_t
1060 // Known values held in GPRs prior to the action of .insn / .ii,
1066 insn_optimize_data_t () = delete;
1068 insn_optimize_data_t (memento_t
&memo
)
1071 }; // insn_optimize_data_t
1073 struct optimize_data_t
1075 insn_optimize_data_t prev
;
1076 insn_optimize_data_t curr
;
1078 // Number >= 0 of new insns that replace the curr insn and maybe also the
1079 // prev insn. -1 when no replacement has been found.
1080 int n_new_insns
= -1;
1082 // .prev will be removed provided we have (potentially zero) new insns.
1083 bool delete_prev_p
= false;
1085 // Ignore these GPRs when comparing the simulation results of
1086 // old and new insn sequences. Usually some scratch reg(s).
1087 gprmask_t ignore_mask
= 0;
1089 optimize_data_t () = delete;
1091 optimize_data_t (memento_t
&prev_regs
, memento_t
&curr_regs
)
1092 : prev(prev_regs
), curr(curr_regs
)
1095 bool try_fuse (bbinfo_t
*);
1096 bool try_mem0 (bbinfo_t
*);
1097 bool try_bin_arg1 (bbinfo_t
*);
1098 bool try_simplify (bbinfo_t
*);
1099 bool try_split_ldi (bbinfo_t
*);
1100 bool try_split_any (bbinfo_t
*);
1101 bool fail (const char *reason
);
1102 bool emit_signs (int r_sign
, gprmask_t
);
1103 void emit_move_mask (int dest
, int src
, int n_bytes
, gprmask_t
&);
1104 rtx_insn
*emit_sequence (basic_block
, rtx_insn
*);
1105 bool get_2ary_operands (rtx_code
&, const absint_byte_t
&,
1106 insn_optimize_data_t
&, int r_dest
,
1107 absint_val_t
&, absint_val_t
&, int &ex_cost
);
1108 rtx_insn
*emit_and_apply_move (memento_t
&, rtx dest
, rtx src
);
1110 // M2 is the state of GPRs as the sequence starts; M1 is the state one before.
1111 static void apply_sequence (const std::vector
<rtx_insn
*> &insns
,
1112 memento_t
&m1
, memento_t
&m2
)
1114 gcc_assert (insns
.size () >= 1);
1116 for (auto &i
: insns
)
1119 m2
.apply_insn (i
, false);
1122 }; // optimize_data_t
1125 // Emit INSNS before .curr.insn, replacing .curr.insn and also .prev.insn when
1126 // .delete_prev_p is on. Adjusts .curr.regs and .prev.regs accordingly.
1128 optimize_data_t::emit_sequence (basic_block bb
, rtx_insn
*insns
)
1130 gcc_assert (n_new_insns
>= 0);
1132 // The old insns will be replaced by and simulated...
1133 const std::vector
<rtx_insn
*> old_insns
= delete_prev_p
1134 ? std::vector
<rtx_insn
*> { prev
.insn
, curr
.insn
}
1135 : std::vector
<rtx_insn
*> { curr
.insn
};
1137 // ...against the new insns.
1138 std::vector
<rtx_insn
*> new_insns
;
1139 for (rtx_insn
*i
= insns
; i
; i
= NEXT_INSN (i
))
1140 new_insns
.push_back (i
);
1142 rtx_insn
*new_curr_insn
;
1144 memento_t
&m1
= prev
.regs
;
1145 memento_t
&m2
= curr
.regs
;
1147 if (new_insns
.empty ())
1153 new_curr_insn
= prev_nondebug_insn_bb (bb
, prev
.insn
);
1156 new_curr_insn
= prev
.insn
;
1160 // We are going to emit at least one new insn. Simulate the effect of
1161 // the new sequence and compare it against the effect of the old one.
1162 // Both effects must be the same (modulo scratch regs).
1169 m2
= m1
, m1
.known
= 0;
1170 n2
= n1
, n1
.known
= 0;
1173 avr_dump (";; Applying new route...\n");
1174 optimize_data_t::apply_sequence (new_insns
, n1
, n2
);
1176 avr_dump (";; Applying old route...\n");
1177 optimize_data_t::apply_sequence (old_insns
, m1
, m2
);
1180 if (! m2
.equals (n2
, ignore_mask
))
1182 // When we come here, then
1183 // - We have a genuine bug, and/or
1184 // - We did produce insns that are opaque to absint_t's explore().
1185 avr_dump ("INCOMPLETE APPLICATION:\n");
1186 m2
.dump ("regs old route=%s\n\n");
1187 n2
.dump ("regs new route=%s\n\n");
1188 avr_dump ("The new insns are:\n%L", insns
);
1190 fatal_insn ("incomplete application of insn", insns
);
1193 // Use N1 and N2 as the new GPR states. Even though they are equal
1194 // modulo ignore_mask, N2 may know more about GPRs when it doesn't
1195 // clobber the scratch reg.
1199 emit_insn_before (insns
, curr
.insn
);
1201 new_curr_insn
= new_insns
.back ();
1205 SET_INSN_DELETED (prev
.insn
);
1207 SET_INSN_DELETED (curr
.insn
);
1209 return new_curr_insn
;
1213 const pass_data avr_pass_data_fuse_move
=
1216 "", // name (will be patched)
1217 OPTGROUP_NONE
, // optinfo_flags
1218 TV_MACH_DEP
, // tv_id
1219 0, // properties_required
1220 0, // properties_provided
1221 0, // properties_destroyed
1222 0, // todo_flags_start
1223 TODO_df_finish
| TODO_df_verify
// todo_flags_finish
1227 class avr_pass_fuse_move
: public rtl_opt_pass
1230 avr_pass_fuse_move (gcc::context
*ctxt
, const char *name
)
1231 : rtl_opt_pass (avr_pass_data_fuse_move
, ctxt
)
1236 unsigned int execute (function
*func
) final override
1238 if (optimize
> 0 && avropt_fuse_move
> 0)
1240 df_note_add_problem ();
1243 bbinfo_t::optimize_one_function (func
);
1248 }; // avr_pass_fuse_move
1251 // Append PLY to .plies[]. A SET or BLD ply may start a new sequence of
1252 // SETs or BLDs and gets assigned the overhead of the sequence like for an
1253 // initial SET or CLT instruction. A SET ply my be added in two flavours:
1254 // One that starts a sequence of single_sets, and one that represents the
1255 // payload of a set_some insn. MEMO is the GPR state prior to PLY.
1257 plies_t::add (ply_t ply
, const ply_t
*prev
, const memento_t
&memo
,
1258 bool maybe_set_some
)
1260 if (ply
.code
== SET
)
1262 if (prev
&& prev
->code
== SET
)
1264 // Proceed with the SET sequence flavour.
1265 ply
.in_set_some
= prev
->in_set_some
;
1267 if (ply
.in_set_some
)
1269 else if (! ply
.scratch
&& ply
.needs_scratch ())
1274 // The 1st SET in a sequence. May use set_some to set
1275 // all bytes in one insn, or a bunch of single_sets.
1277 // Route1: Bunch of single_sets.
1278 const int ply_cost
= ply
.cost
;
1279 if (! ply
.scratch
&& ply
.needs_scratch ())
1281 ply
.in_set_some
= false;
1287 // Route 2: One set_some: The 1st SET gets all the overhead.
1289 ply
.cost
= ply_cost
+ 1 + ! memo
.known_dregno ();
1290 ply
.in_set_some
= true;
1294 else if (ply
.is_bld ())
1296 // The first BLD in a series of BLDs gets the extra costs
1297 // for the SET / CLT that precedes the BLDs.
1298 ply
.cost
+= ! ply
.is_same_bld (prev
);
1305 // Emit insns for .plies[] and return the number of emitted insns.
1306 // The emitted insns represent the effect of II with MEMO, which
1307 // is the GPR knowledge before II is executed.
1309 plies_t::emit_insns (const insninfo_t
&ii
, const memento_t
&memo
) const
1313 for (int i
= 0; i
< n_plies
; ++i
)
1315 const ply_t
&p
= plies
[i
];
1317 // SETs and BLDs are dumped by their emit_xxxs().
1318 if (p
.code
!= SET
&& ! p
.is_bld ())
1321 rtx src1
= NULL_RTX
;
1322 rtx src2
= NULL_RTX
;
1323 rtx dest
= NULL_RTX
;
1324 rtx xscratch
= NULL_RTX
;
1325 rtx_code code
= p
.code
;
1330 avr_dump ("\n\n;; Bad ply_t:\n");
1335 case REG
: // *movhi = MOVW; movqi_insn = MOV
1336 dest
= gen_rtx_REG (p
.size
== 1 ? QImode
: HImode
, p
.regno
);
1337 src1
= gen_rtx_REG (p
.size
== 1 ? QImode
: HImode
, p
.arg
);
1340 case SET
: // movqi_insn = LDI, CLR; set_some = (LDI + MOV) ** size.
1341 i
+= emit_sets (ii
, n_insns
, memo
, i
) - 1;
1344 case MOD
: // *ior<mode>3, *and<mode>3 = SET + BLD... / CLT + BLD...
1345 i
+= emit_blds (ii
, n_insns
, i
) - 1;
1348 case MINUS
: // *subqi3 = SUB
1349 case PLUS
: // *addqi3 = ADD
1350 case AND
: // *andqi3 = AND
1351 case IOR
: // *iorqi3 = OR
1352 case XOR
: // *xorqi3 = EOR
1353 dest
= gen_rtx_REG (QImode
, p
.regno
);
1354 src2
= gen_rtx_REG (QImode
, p
.arg
);
1357 case PRE_INC
: // *addqi3 = INC
1358 case PRE_DEC
: // *addqi3 = DEC
1360 dest
= gen_rtx_REG (QImode
, p
.regno
);
1361 src2
= p
.code
== PRE_INC
? const1_rtx
: constm1_rtx
;
1364 case NEG
: // *negqi2 = NEG
1365 case NOT
: // *one_cmplqi2 = COM
1366 dest
= gen_rtx_REG (QImode
, p
.regno
);
1370 case ROTATE
: // *rotlqi3 = SWAP
1371 case ASHIFT
: // *ashlqi3 = LSL
1372 case ASHIFTRT
: // *ashrqi3 = ASR
1373 case LSHIFTRT
: // *lshrqi3 = LSR
1374 dest
= gen_rtx_REG (QImode
, p
.regno
);
1375 src2
= GEN_INT (code
== ROTATE
? 4 : 1);
1378 case SS_PLUS
: // *addhi3 = ADIW, SBIW
1380 dest
= gen_rtx_REG (HImode
, p
.regno
);
1381 src2
= gen_int_mode (p
.arg
, HImode
);
1385 gcc_assert (dest
&& (! src1
) + (! src2
) == 1);
1387 rtx src
= code
== REG
|| code
== SET
1390 ? gen_rtx_fmt_ee (code
, GET_MODE (dest
), dest
, src2
)
1391 : gen_rtx_fmt_e (code
, GET_MODE (dest
), src1
));
1393 emit_valid_move_clobbercc (dest
, src
, xscratch
);
1401 // Helper for .emit_insns(). Emit an ior<mode>3 or and<mode>3 insn
1402 // that's equivalent to a sequence of contiguous BLDs starting at
1403 // .plies[ISTART]. Updates N_INSNS according to the number of insns
1404 // emitted and returns the number of consumed plys in .plies[].
1406 plies_t::emit_blds (const insninfo_t
&ii
, int &n_insns
, int istart
) const
1408 const ply_t
&first
= plies
[istart
];
1410 gcc_assert (ii
.m_size
<= 4);
1411 gcc_assert (first
.is_bld ());
1413 const rtx_code code
= first
.is_setbld () ? IOR
: AND
;
1414 const machine_mode mode
= size_to_mode (ii
.m_size
);
1416 // Determine mask and number of BLDs.
1421 for (int i
= istart
; i
< n_plies
; ++i
, ++n_blds
)
1423 const ply_t
&p
= plies
[i
];
1424 if (! p
.is_bld () || ! p
.is_same_bld (& first
))
1427 // For AND, work on the 1-complement of the mask,
1428 // i.e. 1's specify which bits to clear.
1429 uint8_t mask8
= code
== IOR
? p
.arg
: ~p
.arg
;
1430 mask
|= mask8
<< (8 * (p
.regno
- ii
.m_regno
));
1433 mask
= GET_MODE_MASK (mode
) & (code
== IOR
? mask
: ~mask
);
1437 fprintf (dump_file
, ";; emit_blds[%d...%d] R%d[%d]%s=%0*x\n",
1438 istart
, istart
+ n_blds
- 1, ii
.m_regno
, ii
.m_size
,
1439 code
== IOR
? "|" : "&", 2 * ii
.m_size
, (int) mask
);
1442 for (int i
= 0; i
< n_blds
; ++i
)
1443 plies
[i
+ istart
].dump ();
1445 rtx dest
= gen_rtx_REG (mode
, ii
.m_regno
);
1446 rtx src
= gen_rtx_fmt_ee (code
, mode
, dest
, gen_int_mode (mask
, mode
));
1447 rtx xscratch
= mode
== QImode
? NULL_RTX
: gen_rtx_SCRATCH (QImode
);
1449 emit_valid_move_clobbercc (dest
, src
, xscratch
);
1456 // Emit insns for a contiguous sequence of SET ply_t's starting at
1457 // .plies[ISTART]. Advances N_INSNS by the number of emitted insns.
1458 // MEMO ist the state of the GPRs before II is executed, where II
1459 // represents the insn under optimization.
1460 // The emitted insns are "movqi_insn" or "*reload_inqi"
1461 // when .plies[ISTART].in_set_some is not set, and one "set_some" insn
1462 // when .plies[ISTART].in_set_some is set.
1464 plies_t::emit_sets (const insninfo_t
&ii
, int &n_insns
, const memento_t
&memo
,
1467 gcc_assert (plies
[istart
].code
== SET
);
1469 const bool in_set_some
= plies
[istart
].in_set_some
;
1471 // Some d-regno that holds a compile-time constant, or 0.
1472 const int known_dregno
= memo
.known_dregno ();
1474 // Determine number of contiguous SETs,
1475 // and sort them in ps[] such that smaller regnos come first.
1477 const ply_t
*ps
[FUSE_MOVE_MAX_MODESIZE
];
1480 for (int i
= istart
; i
< n_plies
&& plies
[i
].code
== SET
; ++i
)
1481 ps
[n_sets
++] = & plies
[i
];
1485 fprintf (dump_file
, ";; emit_sets[%d...%d] R%d[%d]=%0*" PRIx64
,
1486 istart
, istart
+ n_sets
- 1, ii
.m_regno
, ii
.m_size
,
1487 2 * ii
.m_size
, ii
.m_isrc
);
1488 fprintf (dump_file
, ", scratch=%s%d", "R" + ! ii
.m_scratch
, ii
.m_scratch
);
1489 fprintf (dump_file
, ", known_dreg=%s%d, set_some=%d\n",
1490 "R" + ! known_dregno
, known_dregno
, in_set_some
);
1493 for (int i
= 0; i
< n_sets
; ++i
)
1496 // Sort. This is most useful on regs like (reg:SI REG_14).
1497 for (int i
= 0; i
< n_sets
- 1; ++i
)
1498 for (int j
= i
+ 1; j
< n_sets
; ++j
)
1499 if (ps
[i
]->regno
> ps
[j
]->regno
)
1500 std::swap (ps
[i
], ps
[j
]);
1502 // Prepare operands.
1503 rtx dst
[FUSE_MOVE_MAX_MODESIZE
];
1504 rtx src
[FUSE_MOVE_MAX_MODESIZE
];
1505 for (int i
= 0; i
< n_sets
; ++i
)
1507 dst
[i
] = gen_rtx_REG (QImode
, ps
[i
]->regno
);
1508 src
[i
] = gen_int_mode (ps
[i
]->arg
, QImode
);
1513 // Emit a "set_some" insn that sets all of the collected 8-bit SETs.
1514 // This is a parallel with n_sets QImode SETs as payload.
1516 gcc_assert (! known_dregno
|| memo
.knows (known_dregno
));
1519 rtx op1
= known_dregno
1520 ? gen_rtx_REG (QImode
, known_dregno
)
1522 // ...with a known content, so it can be restored without saving.
1523 rtx op2
= known_dregno
1524 ? gen_int_mode (memo
.values
[known_dregno
], QImode
)
1526 // Target register envelope.
1527 rtx op3
= GEN_INT (ii
.m_regno
);
1528 rtx op4
= GEN_INT (ii
.m_size
);
1531 for (int i
= 0; i
< n_sets
; ++i
)
1532 dst
[i
] = gen_rtx_SET (dst
[i
], src
[i
]);
1534 rtvec vec
= gen_rtvec (5 + n_sets
,
1535 gen_rtx_USE (VOIDmode
, op1
),
1536 gen_rtx_USE (VOIDmode
, op2
),
1537 gen_rtx_USE (VOIDmode
, op3
),
1538 gen_rtx_USE (VOIDmode
, op4
),
1539 gen_rtx_CLOBBER (VOIDmode
, cc_reg_rtx
),
1540 dst
[0], dst
[1], dst
[2], dst
[3]);
1541 rtx pattern
= gen_rtx_PARALLEL (VOIDmode
, vec
);
1543 emit_valid_insn (pattern
);
1548 // Emit a bunch of movqi_insn / *reload_inqi insns.
1550 for (int i
= 0; i
< n_sets
; ++i
)
1552 && AVRasm::constant_cost (SET
, ps
[i
]->regno
, ps
[i
]->arg
) > 1)
1554 rtx scratch
= gen_rtx_REG (QImode
, ii
.m_scratch
);
1555 bool use_reload_inqi
= true;
1556 if (use_reload_inqi
)
1558 emit_valid_move_clobbercc (dst
[i
], src
[i
], scratch
);
1563 emit_valid_move_clobbercc (scratch
, src
[i
]);
1564 emit_valid_move_clobbercc (dst
[i
], scratch
);
1570 emit_valid_move_clobbercc (dst
[i
], src
[i
]);
1579 // Try to find an operation such that Y = op (X).
1580 // Shifts and rotates are regarded as unary operaions with
1581 // an implied 2nd operand or 1 or 4, respectively.
1583 find_arith (uint8_t y
, uint8_t x
)
1585 #define RETIF(ex, code) y == (0xff & (ex)) ? code
1586 return select
<rtx_code
>()
1587 : RETIF (x
+ 1, PRE_INC
)
1588 : RETIF (x
- 1, PRE_DEC
)
1589 : RETIF ((x
<< 4) | (x
>> 4), ROTATE
)
1592 : RETIF (x
>> 1, LSHIFTRT
)
1593 : RETIF (x
<< 1, ASHIFT
)
1594 : RETIF ((x
>> 1) | (x
& 0x80), ASHIFTRT
)
1600 // Try to find an operation such that Z = X op X.
1602 find_arith2 (uint8_t z
, uint8_t x
, uint8_t y
)
1604 #define RETIF(ex, code) z == (0xff & (ex)) ? code
1605 return select
<rtx_code
>()
1606 : RETIF (x
+ y
, PLUS
)
1607 : RETIF (x
- y
, MINUS
)
1608 : RETIF (x
& y
, AND
)
1609 : RETIF (x
| y
, IOR
)
1610 : RETIF (x
^ y
, XOR
)
1616 // Add plies to .plies[] that represent a MOVW, but only ones that reduce
1617 // the Hamming distance from REGNO[SIZE] to VAL by exactly DHAMM.
1619 plies_t::add_plies_movw (int regno
, int size
, uint64_t val
,
1620 int dhamm
, const memento_t
&memo
)
1622 if (! AVR_HAVE_MOVW
|| size
< 2)
1625 for (int i
= 0; i
< size
- 1; i
+= 2)
1627 // MOVW that sets less than 2 regs to the target value is
1628 // not needed for the upper regs.
1629 if (dhamm
!= 2 && regno
+ i
>= REG_16
)
1632 const uint16_t val16
= val
>> (8 * i
);
1633 const uint8_t lo8
= val16
;
1634 const uint8_t hi8
= val16
>> 8;
1636 // When one of the target bytes is already as expected, then
1637 // no MOVW is needed for an optimal sequence.
1638 if (memo
.have_value (regno
+ i
, 1, lo8
)
1639 || memo
.have_value (regno
+ i
+ 1, 1, hi8
))
1642 const int h_old
= memo
.hamming (regno
+ i
, 2, val16
);
1644 // Record MOVWs that reduce the Hamming distance by DHAMM as requested.
1645 for (int j
= FIRST_GPR
; j
< REG_32
; j
+= 2)
1647 && memo
.knows (j
, 2))
1649 const int h_new
= memo
.hamming (j
, 2, val16
);
1650 if (h_new
== h_old
- dhamm
)
1651 add (ply_t
{ regno
+ i
, 2, REG
, j
, 1, dhamm
});
1657 // Set PS to plys that reduce the Hamming distance from II.m_regno to
1658 // compile-time constant II.m_isrc by 2, 1 or 0. PREV is NULL or points
1659 // to a previous ply_t. MEMO is the GPR state after PREV and prior to the
1662 bbinfo_t::get_plies (plies_t
&ps
, const insninfo_t
&ii
, const memento_t
&memo
,
1667 fpd
->n_get_plies
+= 1;
1669 const bool maybe_set_some
= (bbinfo_t::use_set_some_p
&& ii
.needs_scratch ());
1671 // Start with cheap plies, then continue to more expensive ones.
1672 const int regno
= ii
.m_regno
;
1673 const int size
= ii
.m_size
;
1674 const uint64_t val
= ii
.m_isrc
;
1676 // Find MOVW with a Hamming delta of 2.
1677 ps
.add_plies_movw (regno
, size
, val
, 2, memo
);
1680 if (AVR_HAVE_ADIW
&& size
>= 2)
1681 for (int i
= 0; i
< size
- 1; i
+= 2)
1682 if (regno
+ i
>= REG_24
1683 && memo
.knows (regno
+ i
, 2))
1685 const int16_t value16
= memo
[regno
+ i
] + 256 * memo
[regno
+ i
+ 1];
1686 const int16_t lo16
= val
>> (8 * i
);
1687 const int16_t delta
= lo16
- value16
;
1688 const uint8_t lo8
= val
>> (8 * i
);
1689 const uint8_t hi8
= val
>> (8 * i
+ 8);
1690 if (IN_RANGE (delta
, -63, 63)
1691 && lo8
!= memo
[regno
+ i
]
1692 && hi8
!= memo
[regno
+ i
+ 1])
1694 ps
.add (ply_t
{ regno
+ i
, 2, SS_PLUS
, delta
, 1, 2 });
1698 // Find 1-reg plies. In an optimal sequence, each 1-reg ply will decrease
1699 // the Hamming distance. Thus we only have to consider plies that set
1700 // one of the target bytes to the target value VAL. Start with the
1701 // high registers since that is the canonical order when two plies commute.
1703 for (int i
= size
- 1; i
>= 0; --i
)
1705 const uint8_t val8
= val
>> (8 * i
);
1707 // Nothing to do for this byte when its value is already as desired.
1708 if (memo
.have_value (regno
+ i
, 1, val8
))
1712 if (regno
+ i
>= REG_16
|| val8
== 0)
1713 ps
.add (ply_t
{ regno
+ i
, 1, SET
, val8
, 1 }, prev
, memo
,
1716 // We only may need to MOV non-zero values since there is CLR,
1717 // and only when there is no LDI.
1719 && regno
+ i
< REG_16
)
1721 // MOV where the source register is one of the target regs.
1722 for (int j
= 0; j
< size
; ++j
)
1724 if (memo
.have_value (regno
+ j
, 1, val8
))
1725 ps
.add (ply_t
{ regno
+ i
, 1, REG
, regno
+ j
, 1 });
1727 // MOV where the source register is not a target reg.
1729 for (int j
= FIRST_GPR
; j
< REG_32
; ++j
)
1730 if (! IN_RANGE (j
, regno
, regno
+ size
- 1))
1731 if (memo
.have_value (j
, 1, val8
))
1732 ps
.add (ply_t
{ regno
+ i
, 1, REG
, j
, 1 });
1735 if (regno
+ i
< REG_16
&& val8
!= 0)
1737 ply_t p
{ regno
+ i
, 1, SET
, val8
, 2 };
1738 p
.scratch
= ii
.m_scratch
;
1739 ps
.add (p
, prev
, memo
, maybe_set_some
);
1744 // Arithmetic like INC, DEC or ASHIFT.
1745 for (int i
= size
- 1; i
>= 0; --i
)
1746 if (bbinfo_t::use_arith_p
1747 && regno
+ i
< REG_16
1748 && memo
.knows (regno
+ i
))
1750 const uint8_t y
= val
>> (8 * i
);
1751 const uint8_t x
= memo
[regno
+ i
];
1754 if (y
== 0 || y
== x
)
1757 // INC, DEC, SWAP, LSL, NEG, ...
1758 if (UNKNOWN
!= (code
= find_arith (y
, x
)))
1760 ps
.add (ply_t
{ regno
+ i
, 1, code
, x
/* dummy */, 1 });
1765 for (int r
= FIRST_GPR
; r
< REG_32
; ++r
)
1769 && UNKNOWN
!= (code
= find_arith2 (y
, x
, memo
[r
])))
1771 ps
.add (ply_t
{ regno
+ i
, 1, code
, r
, 1 });
1774 if (size
< 2 || size
> 4)
1778 if ((x
& y
) == x
&& popcount_hwi (x
^ y
) == 1)
1779 ps
.add (ply_t
{ regno
+ i
, 1, MOD
, x
^ y
, 1 },
1780 prev
, memo
, maybe_set_some
);
1783 if ((x
& y
) == y
&& popcount_hwi (x
^ y
) == 1)
1784 ps
.add (ply_t
{ regno
+ i
, 1, MOD
, x
^ y
^ 0xff, 1 },
1785 prev
, memo
, maybe_set_some
);
1788 if (bbinfo_t::use_arith_p
1789 // For 8-byte values, don't use ply_t's with only a partial reduction
1790 // of the hamming distance.
1793 // Find MOVW with a Hamming delta of 1, then 0.
1794 ps
.add_plies_movw (regno
, size
, val
, 1, memo
);
1795 ps
.add_plies_movw (regno
, size
, val
, 0, memo
);
1798 plies_t::max_n_plies
= std::max (plies_t::max_n_plies
, ps
.n_plies
);
1802 // Try to combine two 8-bit insns PREV and CURR that (effectively)
1803 // are REG = CONST_INT to one 16-bit such insn. Returns true on success.
1805 insninfo_t::combine (const insninfo_t
&prev
, const insninfo_t
&curr
)
1807 if (prev
.m_size
== 1 && curr
.m_size
== 1
1808 && prev
.m_regno
== (1 ^ curr
.m_regno
)
1809 && curr
.m_code
== CONST_INT
1810 && prev
.m_code
== CONST_INT
)
1812 m_regno
= curr
.m_regno
& ~1;
1815 m_scratch
= std::max (curr
.m_scratch
, prev
.m_scratch
);
1816 m_isrc
= m_regno
== prev
.m_regno
1817 ? (uint8_t) prev
.m_isrc
+ 256 * (uint8_t) curr
.m_isrc
1818 : (uint8_t) curr
.m_isrc
+ 256 * (uint8_t) prev
.m_isrc
;
1827 // Return the cost (in terms of words) of the respective mov<mode> insn.
1828 // This can be used as an upper bound for the ply_t's cost.
1830 insninfo_t::cost () const
1832 if (m_code
!= CONST_INT
)
1835 if (m_regno
>= REG_16
|| m_isrc
== 0)
1837 // MOVW can save one instruction.
1838 - (AVR_HAVE_MOVW
&& m_size
== 4 && (uint32_t) m_isrc
% 0x10001 == 0);
1840 // LDI + MOV to a lower reg.
1841 if (m_scratch
&& m_size
== 1)
1847 for (int i
= 0; i
< m_size
; ++i
)
1848 len
+= m_regno
+ i
< REG_16
&& (0xff & (m_isrc
>> (8 * i
))) != 0;
1852 // All other cases are complicated. Ask the output oracle.
1853 const machine_mode mode
= size_to_mode (m_size
);
1854 rtx xscratch
= m_scratch
? all_regs_rtx
[m_scratch
] : NULL_RTX
;
1855 rtx xop
[] = { gen_rtx_REG (mode
, m_regno
), gen_int_mode (m_isrc
, mode
) };
1858 output_reload_insisf (xop
, xscratch
, &len
);
1860 output_reload_in_const (xop
, xscratch
, &len
, false);
1865 // Emit the according REG = REG-or-CONST_INT insn. Returns 1 or aborts
1866 // when the insn is not of that form.
1868 insninfo_t::emit_insn () const
1872 machine_mode mode
= size_to_mode (m_size
);
1873 rtx xsrc
= NULL_RTX
;
1874 rtx xscratch
= NULL_RTX
;
1876 gcc_assert (m_size
> 0);
1884 xsrc
= gen_int_mode (m_isrc
, mode
);
1885 if (m_scratch
&& m_regno
< REG_16
)
1886 xscratch
= gen_rtx_REG (QImode
, m_scratch
);
1890 gcc_assert (gpr_regno_p (m_rsrc
, m_size
));
1891 if (m_regno
!= m_rsrc
)
1892 xsrc
= gen_rtx_REG (mode
, m_rsrc
);
1898 rtx dest
= gen_rtx_REG (mode
, m_regno
);
1899 emit_valid_move_clobbercc (dest
, xsrc
, xscratch
);
1907 // Entering a basic block means combining known register values from
1908 // all incoming BBs.
1912 avr_dump ("\n;; Entering [bb %d]\n", bb
->index
);
1914 gcc_assert (! done
);
1918 gprmask_t pred_known_mask
= ~0u;
1919 bbinfo_t
*bbi
= nullptr;
1921 // A quick iteration over all predecessors / incoming edges to reveal
1922 // whether this BB is worth a closer look.
1923 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1925 basic_block pred
= e
->src
;
1926 bbi
= & bb_info
[pred
->index
];
1928 pred_known_mask
&= bbi
->regs
.known
;
1932 avr_dump (";; [bb %d] <- [bb %d] ", e
->dest
->index
, e
->src
->index
);
1936 avr_dump (" (unknown)\n");
1940 // Only if all predecessors have already been handled, we can
1941 // have known values as we are entering the current BB.
1942 if (pred_known_mask
!= 0
1945 // Initialize current BB info from BI, an arbitrary predecessor.
1949 // Coalesce the output values from all predecessing BBs. At the
1950 // start of the current BB, a value is only known if it is known
1951 // in *all* predecessors and *all* these values are the same.
1952 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1954 regs
.coalesce (bb_info
[e
->src
->index
].regs
);
1960 avr_dump (";; [bb %d] known at start: ", bb
->index
);
1964 avr_dump (" (none)\n");
1976 fprintf (dump_file
, ";; Leaving [bb %d]\n\n", bb
->index
);
1980 /* Initialize according to INSN which is a 1-byte single_set that's
1981 (effectively) a reg = reg or reg = const move. INSN may be the result
1982 of the current pass's optimization, e.g. something like INC R2 where R2
1983 has a known content. MEMO is the state prior to INSN. Only CONST
1984 cases are recorded; plus cases that are non-trivial for example when
1985 an XOR decays to a move. */
1988 insninfo_t::init1 (insn_optimize_data_t
&iod
, int max_size
,
1989 const char *purpose
= "")
1993 m_old_code
= UNKNOWN
;
1997 || ! (m_set
= single_set_with_scratch (iod
.insn
, m_scratch
)))
2000 rtx dest
= SET_DEST (m_set
);
2001 machine_mode mode
= GET_MODE (dest
);
2002 const int n_bytes
= GET_MODE_SIZE (mode
);
2003 max_size
= std::min (max_size
, FUSE_MOVE_MAX_MODESIZE
);
2006 || END_REGNO (dest
) > REG_32
2007 || n_bytes
> max_size
)
2010 // Omit insns that (explicitly) touch fixed GPRs in any way.
2011 using elt0_getter_HRS
= elt0_getter
<HARD_REG_SET
, HARD_REG_ELT_TYPE
>;
2013 CLEAR_HARD_REG_SET (hregs
);
2014 find_all_hard_regs (PATTERN (iod
.insn
), & hregs
);
2015 if (memento_t::fixed_regs_mask
& (gprmask_t
) elt0_getter_HRS::get (hregs
))
2017 avr_dump (";; %sinit1 has fixed GPRs\n", purpose
);
2021 if ((iod
.unused
= find_reg_note (iod
.insn
, REG_UNUSED
, dest
)))
2024 m_src
= SET_SRC (m_set
);
2025 m_regno
= REGNO (dest
);
2026 const rtx_code src_code
= GET_CODE (m_src
);
2028 m_ai
= absint_t::explore (m_src
, iod
.regs
, mode
);
2030 if (m_ai
.popcount ())
2032 if (m_ai
.end_knows (CONST_INT
) >= n_bytes
)
2035 m_old_code
= CONSTANT_P (m_src
) ? UNKNOWN
: src_code
;
2036 m_isrc
= m_ai
.get_value (n_bytes
);
2039 else if (! REG_P (m_src
)
2041 && m_ai
.end_knows (REG
) >= n_bytes
)
2044 m_old_code
= src_code
;
2045 m_rsrc
= m_ai
[0].regno ();
2048 else if (n_bytes
== 1)
2050 absint_byte_t
&aib
= m_new_src
;
2051 aib
= m_ai
[0].find_alternative_binary (iod
.regs
);
2053 if (aib
.arity () == 2
2054 && aib
.arg (0).regno
== m_regno
)
2056 m_old_code
= src_code
;
2057 m_code
= aib
.get_code ();
2061 else if (n_bytes
>= 2
2062 && m_ai
.end_knows (VALUE
) >= n_bytes
)
2068 if (dump_file
&& m_size
!= 0)
2070 avr_dump (";; %sinit1 (%C", purpose
,
2071 m_old_code
? m_old_code
: m_code
);
2073 avr_dump ("-> %C", m_code
);
2074 avr_dump (") insn %d to R%d[%d] := %C:%m = ", INSN_UID (iod
.insn
),
2075 m_regno
, n_bytes
, src_code
, mode
);
2079 if (dump_flags
& TDF_FOLDING
)
2088 // The private worker for .apply_insn().
2090 memento_t::apply_insn1 (rtx_insn
*insn
, bool unused
)
2092 gcc_assert (NONDEBUG_INSN_P (insn
));
2094 if (INSN_CODE (insn
) == CODE_FOR_set_some
)
2096 // This insn only sets some selected bytes of register $3 of
2097 // modesize $4. If non-0, then $1 is a QImode scratch d-reg with
2098 // a known value of $2.
2100 const auto &xop
= recog_data
.operand
;
2101 extract_insn (insn
);
2102 gcc_assert (recog_data
.n_operands
== 7);
2103 gcc_assert (set_some_operation (xop
[0], VOIDmode
));
2105 const rtx
&xscratch
= xop
[1];
2106 const rtx
&xscratch_value
= xop
[2];
2107 const int sets_start
= 5;
2109 for (int i
= sets_start
; i
< XVECLEN (xop
[0], 0); ++i
)
2111 rtx xset
= XVECEXP (xop
[0], 0, i
);
2112 avr_dump (";; set_some %r = %r\n", XEXP (xset
, 0), XEXP (xset
, 1));
2113 set_values (XEXP (xset
, 0), XEXP (xset
, 1));
2116 if (REG_P (xscratch
))
2118 avr_dump (";; set_some %r = %r restore\n", xscratch
, xscratch_value
);
2119 set_values (xscratch
, xscratch_value
);
2123 } // CODE_FOR_set_some
2125 memento_t mold
= *this;
2127 // When insn changes a register in whatever way, set it to "unknown".
2130 find_all_hard_reg_sets (insn
, &rset
, true /* implicit */);
2133 rtx set
= single_set (insn
);
2137 || ! REG_P (dest
= SET_DEST (set
))
2138 || END_REGNO (dest
) > REG_32
2139 || (regmask (dest
) & memento_t::fixed_regs_mask
))
2142 rtx src
= SET_SRC (set
);
2143 const rtx_code src_code
= GET_CODE (src
);
2144 const machine_mode mode
= GET_MODE (dest
);
2145 const int n_bytes
= GET_MODE_SIZE (mode
);
2147 // Insns that are too complicated or have a poor yield.
2148 // Just record which regs are clobberd / changed.
2149 if (n_bytes
> FUSE_MOVE_MAX_MODESIZE
2151 || (REG_P (src
) && END_REGNO (src
) > REG_32
))
2153 // Comparisons may clobber the compared reg when it is unused after.
2154 if (src_code
== COMPARE
2155 && REG_P (XEXP (src
, 0))
2156 && CONSTANT_P (XEXP (src
, 1)))
2158 rtx reg
= XEXP (src
, 0);
2159 for (unsigned r
= REGNO (reg
); r
< END_REGNO (reg
); ++r
)
2168 // Simulate the effect of some selected insns that are likely to produce
2169 // or propagate known values.
2171 // Get an abstract representation of src. Bytes may be unknown,
2172 // known to equal some 8-bit compile-time constant (CTC) value,
2173 // or are known to equal some 8-bit register.
2174 // TODO: Currently, only the ai[].val8 knowledge ist used.
2175 // What's the best way to make use of ai[].regno ?
2177 absint_t ai
= absint_t::explore (src
, mold
, mode
);
2181 avr_dump (";; apply_insn %d R%d[%d] := %C:%m = ", INSN_UID (insn
),
2182 REGNO (dest
), n_bytes
, src_code
, mode
);
2185 for (int i
= 0; i
< n_bytes
; ++i
)
2186 if (ai
[i
].can (CONST_INT
))
2187 set_value (i
+ REGNO (dest
), ai
[i
].val8 ());
2193 memento_t::apply (const ply_t
&p
)
2197 copy_value (p
.regno
, p
.arg
);
2198 copy_value (p
.regno
+ 1, p
.arg
+ 1);
2200 else if (p
.is_adiw ())
2202 int val
= p
.arg
+ values
[p
.regno
] + 256 * values
[1 + p
.regno
];
2203 set_value (p
.regno
, val
);
2204 set_value (p
.regno
+ 1, val
>> 8);
2206 else if (p
.size
== 1)
2208 int x
= values
[p
.regno
];
2209 int y
= values
[p
.arg
];
2218 copy_value (p
.regno
, p
.arg
);
2222 set_value (p
.regno
, p
.arg
);
2223 if (p
.scratch
>= REG_16
)
2224 set_unknown (p
.scratch
);
2228 gcc_assert (knows (p
.regno
));
2229 if (popcount_hwi (p
.arg
) == 1)
2230 values
[p
.regno
] |= p
.arg
;
2231 else if (popcount_hwi (p
.arg
) == 7)
2232 values
[p
.regno
] &= p
.arg
;
2237 #define DO_ARITH(n_args, code, expr) \
2239 gcc_assert (knows (p.regno)); \
2241 gcc_assert (knows (p.arg)); \
2242 set_value (p.regno, expr); \
2245 DO_ARITH (1, NEG
, -x
);
2246 DO_ARITH (1, NOT
, ~x
);
2247 DO_ARITH (1, PRE_INC
, x
+ 1);
2248 DO_ARITH (1, PRE_DEC
, x
- 1);
2249 DO_ARITH (1, ROTATE
, (x
<< 4) | (x
>> 4));
2250 DO_ARITH (1, ASHIFT
, x
<< 1);
2251 DO_ARITH (1, LSHIFTRT
, x
>> 1);
2252 DO_ARITH (1, ASHIFTRT
, (x
>> 1) | (x
& 0x80));
2254 DO_ARITH (2, AND
, x
& y
);
2255 DO_ARITH (2, IOR
, x
| y
);
2256 DO_ARITH (2, XOR
, x
^ y
);
2257 DO_ARITH (2, PLUS
, x
+ y
);
2258 DO_ARITH (2, MINUS
, x
- y
);
2267 // Try to find a sequence of ply_t's that represent a II.m_regno = II.m_isrc
2268 // insn that sets a reg to a compile-time constant, and that is more
2269 // efficient than just a move insn. (When try_split_any_p is on, then
2270 // solutions that perform equal to a move insn are also allowed).
2271 // MEMO0 is the GPR state before II runs. A solution has been found
2272 // when .fpd->solution has at least one entry. LEN specifies the
2273 // depth of recursion, which works on the LEN-th ply_t.
2275 bbinfo_t::find_plies (int len
, const insninfo_t
&ii
, const memento_t
&memo0
)
2277 if (len
> fpd
->n_best_plys
)
2280 memento_t memo
= memo0
;
2281 bool ply_applied_p
= false;
2283 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2284 const bool extra
= dump_file
&& (dump_flags
& TDF_FOLDING
);
2288 fprintf (dump_file
, ";; #%d (HAM=%d): get_plies R%d[%d] = ", len
,
2289 ii
.hamming (fpd
->regs0
), ii
.m_regno
, ii
.m_size
);
2290 fprintf (dump_file
, "0x%0*" PRIx64
"\n",
2291 2 * ii
.m_size
, ii
.m_isrc
& size_to_mask (ii
.m_size
));
2294 plies_t
&ps
= fpd
->plies
[len
- 1];
2296 const ply_t
*const prev
= len
>= 2 ? fpd
->ply_stack
[len
- 2] : nullptr;
2297 const ply_t
*const prev2
= len
>= 3 ? fpd
->ply_stack
[len
- 3] : nullptr;
2299 bbinfo_t::get_plies (ps
, ii
, memo0
, prev
);
2301 #define NEXT(reason) \
2304 fprintf (dump_file, ";; cont=%s\n", reason); \
2308 for (int ip
= 0; ip
< ps
.n_plies
; ++ip
)
2310 const ply_t
&p
= ps
.plies
[ip
];
2312 fpd
->ply_stack
[len
- 1] = &p
;
2318 ply_t::dump_plys (dump_file
, len
, 1, fpd
->ply_stack
+ len
- 1, memo0
);
2320 // A MOVW with a Hamming distance of < 2 requires more plys.
2321 if (p
.is_movw () && len
+ (2 - p
.dhamming
) > fpd
->n_best_plys
)
2326 // Destroying (parts of) the results of the previous ply
2327 // won't yield an optimal sequence.
2328 if (p
.overrides (prev
))
2331 // When two plys are independent of each other, then only
2332 // investigate sequences that operate on the higher reg first.
2333 // This canonicalization reduces the number of candidates,
2334 if (p
.commutes_with (prev
, ii
.m_scratch
)
2335 && p
.regno
> prev
->regno
)
2336 NEXT ("noncanonic");
2338 // Two subsequent BLDs touching the same register.
2341 && p
.changes_result_of (prev
))
2344 // When there is a BLD, then at least 2 of the same kind
2345 // shall occur in a row.
2349 || (prev
->is_setbld () && ! prev2
->is_setbld ())
2350 || (prev
->is_cltbld () && ! prev2
->is_cltbld ())))
2354 // The hamming delta of a MOVW may be less than 2, namely 0 or 1.
2355 // When the latter is the case, then a reasonable sequence must
2356 // modify the result of the MOVW.
2359 && prev
->dhamming
== 1
2360 && ! p
.changes_result_of (prev
))
2364 && prev2
->is_movw ()
2365 && prev2
->dhamming
== 0
2366 && ! p
.changes_result_of (prev2
))
2369 // When setting an n-byte destination, then at most n/2 MOVWs
2370 // will occur in an optimal sequence.
2372 for (int i
= 0; i
< len
; ++i
)
2373 n_movw
+= fpd
->ply_stack
[i
]->is_movw ();
2374 if (n_movw
> ii
.m_size
/ 2)
2382 ply_applied_p
= true;
2384 // Calculate the cost of the sequence we have so far. Scale by some
2385 // factor so that we can express that ADIW is more expensive than MOVW
2386 // because it is slower, but without defeating MOVW.
2387 const int SCALE
= 4;
2390 int cost
= SCALE
* 0;
2393 for (int i
= 0; i
< len
; ++i
)
2395 bool adiw_p
= fpd
->ply_stack
[i
]->is_adiw ();
2396 cost
+= SCALE
* fpd
->ply_stack
[i
]->cost
+ adiw_p
;
2398 movw_p
|= fpd
->ply_stack
[i
]->is_movw ();
2402 const int hamm
= ii
.hamming (memo
);
2404 // The current Hamming distance yields a lower bound of how many
2405 // plys are still required. Consider that future cost already now.
2406 int future_cost
= AVR_HAVE_MOVW
|| (AVR_HAVE_ADIW
&& ii
.m_regno
>= REG_22
)
2410 // Similarly, when MOVW doesn't decrease the Hamming distance by 2,
2411 // then we know that at least 2 - dhamming plys must follow in the
2412 // future. (MOVW + ADIW will not occur.)
2414 future_cost
= std::max (future_cost
, 2 - p
.dhamming
);
2416 if (extra
&& future_cost
)
2417 avr_dump (";; future cost = %d, dh=%d\n", future_cost
, hamm
);
2419 cost
+= SCALE
* future_cost
;
2421 bool profitable
= (cost
< SCALE
* fpd
->max_ply_cost
2422 || (bbinfo_t::try_split_any_p
2423 && fpd
->solution
.n_plies
== 0
2424 && cost
/ SCALE
<= fpd
->max_ply_cost
2425 && cost
/ SCALE
== fpd
->movmode_cost
));
2429 avr_dump (";; cont=cost %d+%d/%d\n", cost
/ SCALE
, penal
, SCALE
);
2435 // Go down that rabbit hole.
2436 gcc_assert (ply_applied_p
);
2437 bbinfo_t::find_plies (1 + len
, ii
, memo
);
2441 // Found a solution that's better than everything so far.
2443 // Reduce the upper cost bound according to the found solution.
2444 // No future solution will be more expensive.
2445 fpd
->max_ply_cost
= cost
/ SCALE
;
2447 fpd
->solution
= plies_t (len
, fpd
->ply_stack
);
2451 avr_dump (";; #%d FOUND COST = %d%s\n", len
, cost
/ SCALE
,
2452 penal
? " with penalty" : "");
2453 ply_t::dump_plys (dump_file
, 0, len
, fpd
->ply_stack
, fpd
->regs0
);
2455 avr_dump (";; END\n");
2463 // Run .find_plies() and return true when .fpd->solution is a sequence of ply_t's
2464 // that represents II, a REG = CONST insn. MEMO is the GPR state prior to II.
2466 bbinfo_t::run_find_plies (const insninfo_t
&ii
, const memento_t
&memo
) const
2468 fpd
->solution
.reset ();
2470 fpd
->n_get_plies
= 0;
2472 const int hamm
= ii
.hamming (memo
);
2476 avr_dump (";; Found redundant insn %d\n",
2477 ii
.m_insn
? INSN_UID (ii
.m_insn
) : 0);
2481 // Upper bound (in words) for any solution that's better than mov<mode>.
2482 // Will be decreased by find plies as it finds better solutions.
2483 fpd
->movmode_cost
= ii
.cost ();
2484 fpd
->max_ply_cost
= fpd
->movmode_cost
;
2486 // With a non-zero Hamming distance, this insn will require at least one
2487 // instruction. When the upper bound for required instructions is that
2488 // small, then the current insn is good enough.
2489 if (fpd
->max_ply_cost
<= 1)
2492 fpd
->n_best_plys
= ii
.n_best_plys (hamm
);
2493 gcc_assert (fpd
->n_best_plys
<= N_BEST_PLYS
);
2497 const uint64_t mask
= size_to_mask (ii
.m_size
);
2498 fprintf (dump_file
, ";; find_plies R%d[%d] = 0x%0*" PRIx64
,
2499 ii
.m_regno
, ii
.m_size
, 2 * ii
.m_size
, ii
.m_isrc
& mask
);
2501 fprintf (dump_file
, ", scratch=r%d", ii
.m_scratch
);
2502 memo
.dump ("\n;; regs%s\n");
2505 avr_dump (";; mov<mode> cost = %d\n", fpd
->max_ply_cost
);
2506 avr_dump (";; max plys = %d\n", fpd
->n_best_plys
);
2507 ply_t::n_ply_ts
= 0;
2509 find_plies (1, ii
, memo
);
2511 avr_dump (";; get_plies called %d times\n", fpd
->n_get_plies
);
2512 avr_dump (";; n_ply_ts = %d\n", ply_t::n_ply_ts
);
2513 ply_t::max_n_ply_ts
= std::max (ply_t::max_n_ply_ts
, ply_t::n_ply_ts
);
2515 return fpd
->solution
.n_plies
!= 0;
2519 // Try to propagate __zero_reg__ to a mem = reg insn's source.
2520 // Returns true on success and sets .n_new_insns.
2522 optimize_data_t::try_mem0 (bbinfo_t
*)
2524 rtx_insn
*insn
= curr
.ii
.m_insn
;
2529 && (set
= single_set (insn
))
2530 && MEM_P (mem
= SET_DEST (set
))
2531 && REG_P (reg
= SET_SRC (set
))
2532 && GET_MODE_SIZE (mode
= GET_MODE (mem
)) <= 4
2533 && END_REGNO (reg
) <= REG_32
2534 && ! (regmask (reg
) & memento_t::fixed_regs_mask
)
2535 && curr
.regs
.have_value (REGNO (reg
), GET_MODE_SIZE (mode
), 0x0))
2537 avr_dump (";; Found insn %d: mem:%m = 0 = r%d\n", INSN_UID (insn
),
2540 // Some insns like PUSHes don't clobber REG_CC.
2541 bool clobbers_cc
= GET_CODE (PATTERN (insn
)) == PARALLEL
;
2544 emit_valid_move_clobbercc (mem
, CONST0_RTX (mode
));
2546 emit_valid_insn (gen_rtx_SET (mem
, CONST0_RTX (mode
)));
2557 // Try to fuse two 1-byte insns .prev and .curr to one 2-byte insn (MOVW).
2558 // Returns true on success, and sets .n_new_insns, .ignore_mask etc.
2560 optimize_data_t::try_fuse (bbinfo_t
*bbi
)
2564 if (! prev
.ii
.m_size
2566 || ! comb
.combine (prev
.ii
, curr
.ii
))
2569 avr_dump (";; Working on fuse of insn %d + insn %d = 0x%04x\n",
2570 INSN_UID (prev
.insn
), INSN_UID (curr
.insn
),
2571 (unsigned) comb
.m_isrc
);
2573 bool found
= bbi
->run_find_plies (comb
, prev
.regs
);
2576 avr_dump (";; Found fuse of insns %d and %d\n",
2577 INSN_UID (prev
.insn
), INSN_UID (curr
.insn
));
2579 n_new_insns
= bbinfo_t::fpd
->solution
.emit_insns (comb
, prev
.regs
);
2580 delete_prev_p
= true;
2582 if (prev
.ii
.m_scratch
)
2583 ignore_mask
|= regmask (prev
.ii
.m_scratch
, 1);
2584 if (curr
.ii
.m_scratch
)
2585 ignore_mask
|= regmask (curr
.ii
.m_scratch
, 1);
2586 ignore_mask
&= ~regmask (comb
.m_regno
, comb
.m_size
);
2593 // Try to replace an arithmetic 1-byte insn by a reg-reg move.
2594 // Returns true on success, and sets .n_new_insns etc.
2596 optimize_data_t::try_simplify (bbinfo_t
*)
2598 if (curr
.ii
.m_size
== 1
2599 && curr
.ii
.m_old_code
!= REG
2600 && curr
.ii
.m_code
== REG
)
2602 avr_dump (";; Found simplify of insn %d\n", INSN_UID (curr
.insn
));
2604 n_new_insns
= curr
.ii
.emit_insn ();
2613 // Try to replace XEXP (*, 1) of a binary operation by a cheaper expression.
2614 // Returns true on success; sets .n_new_insns, .ignore_mask, .delete_prev_p.
2616 optimize_data_t::try_bin_arg1 (bbinfo_t
*)
2618 if (curr
.ii
.m_size
!= 1
2619 || curr
.ii
.m_new_src
.arity () != 2
2623 avr_dump (";; Working on bin_arg1 insn %d\n", INSN_UID (curr
.insn
));
2625 gcc_assert (curr
.ii
.m_src
&& BINARY_P (curr
.ii
.m_src
));
2626 rtx xarg1_old
= XEXP (curr
.ii
.m_src
, 1);
2628 const absint_byte_t
&aib
= curr
.ii
.m_new_src
;
2629 const absint_val_t
&arg0
= aib
.arg (0);
2630 const absint_val_t
&arg1
= aib
.arg (1);
2631 const absint_val_t
&arg1_old
= curr
.ii
.m_ai
[0].arg (1);
2635 if (CONSTANT_P (xarg1_old
))
2637 // Sometimes, we allow expensive constants as 2nd operand like
2638 // in R2 += 2 which produces two INCs. When we have the
2639 // constant handy in a reg, then use that instead of the constant.
2640 const rtx_code code
= aib
.get_code ();
2641 gcc_assert (arg1
.val8
== (INTVAL (xarg1_old
) & 0xff));
2643 if (AVRasm::constant_cost (code
, arg0
.regno
, arg1
.val8
) > 1)
2644 src
= aib
.to_rtx ();
2646 else if (REG_P (xarg1_old
)
2647 && dead_or_set_p (curr
.insn
, xarg1_old
))
2649 src
= aib
.to_rtx ();
2651 // The 2nd operand is a reg with a known content that dies
2652 // at the current insn. Chances are high that the register
2653 // holds a reload value only used by the current insn.
2654 if (prev
.ii
.m_size
== 1
2655 && rtx_equal_p (xarg1_old
, SET_DEST (prev
.ii
.m_set
))
2656 && CONSTANT_P (prev
.ii
.m_src
))
2658 avr_dump (";; Found dying reload insn %d\n", INSN_UID (prev
.insn
));
2660 delete_prev_p
= true;
2661 ignore_mask
= regmask (arg1_old
.regno
, 1);
2667 rtx dest
= SET_DEST (curr
.ii
.m_set
);
2669 avr_dump (";; Found bin_arg1 for insn %d: ", INSN_UID (curr
.insn
));
2670 avr_dump ("%C:%m %r", curr
.ii
.m_code
, GET_MODE (dest
), xarg1_old
);
2671 aib
.dump (" = %s\n");
2673 emit_valid_move_clobbercc (dest
, src
);
2677 return src
!= NULL_RTX
;
2681 // Try to replace a REG = CONST insn by a cheaper sequence.
2682 // Returns true on success, and sets .n_new_insns, .ignore_mask etc.
2684 optimize_data_t::try_split_ldi (bbinfo_t
*bbi
)
2686 if (! curr
.ii
.m_size
2688 || curr
.ii
.m_code
!= CONST_INT
2689 || (! bbinfo_t::try_split_any_p
2690 // Finding plys will only ever succeed when there are
2691 // regs with a known value.
2692 && ! (curr
.regs
.known
2694 && curr
.ii
.m_regno
< REG_16
&& curr
.ii
.m_size
== 4))))
2697 avr_dump (";; Working on split_ldi insn %d\n", INSN_UID (curr
.insn
));
2699 bool found
= bbi
->run_find_plies (curr
.ii
, curr
.regs
);
2702 avr_dump (";; Found split for ldi insn %d\n", INSN_UID (curr
.insn
));
2704 n_new_insns
= bbinfo_t::fpd
->solution
.emit_insns (curr
.ii
, curr
.regs
);
2706 if (curr
.ii
.m_scratch
)
2707 ignore_mask
= regmask (curr
.ii
.m_scratch
, 1);
2714 // Helper for try_split_any().
2716 optimize_data_t::fail (const char *reason
)
2721 fprintf (dump_file
, ";; Giving up split_any: %s\n", reason
);
2727 // Helper for try_split_any().
2729 optimize_data_t::emit_and_apply_move (memento_t
&memo
, rtx dest
, rtx src
)
2731 rtx_insn
*insn
= emit_valid_move_clobbercc (dest
, src
);
2733 memo
.apply_insn (insn
, false);
2739 // Set X0 and X1 so that they are operands valid for a andqi3, iorqi3, xorqi3
2740 // or addqi3 insn with destination R_DEST. The method loads X1 to
2741 // a scratch reg as needed and records the GPR effect in IOD.regs.
2742 // EXTRA_COST are extra costs in units of words of insns that cost more
2743 // than one instruction. This is a helper for try_split_any().
2746 ::get_2ary_operands (rtx_code
&code
, const absint_byte_t
&aib
,
2747 insn_optimize_data_t
&iod
, int r_dest
,
2748 absint_val_t
&x0
, absint_val_t
&x1
, int &extra_cost
)
2750 if (code
!= IOR
&& code
!= AND
&& code
!= XOR
&& code
!= PLUS
)
2751 return fail ("2ary: unknown code");
2756 if (! x0
.knows_regno ()
2758 return fail ("2ary: clueless");
2761 int val8_cost
= val8
< 0 ? 100 : AVRasm::constant_cost (code
, r_dest
, val8
);
2763 if (x0
.regno
== r_dest
2764 && (x1
.knows_regno ()
2769 && x0
.regno
>= REG_16
)
2771 // xorxi3 can only "r,0,r".
2772 // x0 ^ 0x80 <=> x0 - 0x80.
2779 const bool and_1_bit
= code
== AND
&& popcount_hwi (val8
) == 1;
2780 // andqi3 has a "r,r,Cb1" alternative where Cb1 has exactly 1 bit set.
2781 // This can accommodate bytes of higher AND Cb<N> alternatives.
2782 if (x0
.regno
!= r_dest
)
2786 extra_cost
+= 1 + (r_dest
< REG_16
);
2789 else if (x1
.regno
== r_dest
)
2794 return fail ("2ary is a 3-operand insn");
2798 // 1) r_dest = x0.regno, and
2799 // 2) x1 is val8, and
2802 const bool needs_scratch_p
= select
<bool>()
2803 : code
== XOR
? true
2804 : code
== AND
? popcount_hwi (val8
) != 7
2805 : code
== IOR
? popcount_hwi (val8
) != 1
2806 : code
== PLUS
? IN_RANGE (val8
, 3, 0xff - 3)
2807 : bad_case
<bool> ();
2809 const int r_val8
= iod
.regs
.regno_with_value (val8
, 0 /* excludes: none */);
2812 // Found a reg that already holds the constant.
2817 else if (iod
.ii
.m_scratch
)
2819 // Using the insn's scratch reg.
2820 rtx xdst
= gen_rtx_REG (QImode
, iod
.ii
.m_scratch
);
2821 rtx xsrc
= gen_int_mode (x1
.val8
, QImode
);
2822 emit_and_apply_move (iod
.regs
, xdst
, xsrc
);
2824 x1
.regno
= iod
.ii
.m_scratch
;
2829 else if (! needs_scratch_p
)
2831 // Some constants (1 and -1) can be loaded without a scratch.
2837 // This can always fall back to BST + CLR + BLD, but may be cheaper.
2838 extra_cost
+= 1 + (r_dest
< REG_16
);
2842 return fail ("2ary: expensive constant");
2847 any_shift_p (rtx_code code
)
2849 return code
== LSHIFTRT
|| code
== ASHIFTRT
|| code
== ASHIFT
;
2852 // Try to split .curr into a sequence of 1-byte insns.
2853 // Returns true on success. Sets .n_new_insns and .ignore_mask.
2855 optimize_data_t::try_split_any (bbinfo_t
*)
2857 if (curr
.ii
.m_size
< 2
2858 // Constants are split by split_ldi.
2859 || CONSTANT_P (curr
.ii
.m_src
)
2860 // Splitting requires knowledge about what to do with each byte.
2861 || curr
.ii
.m_ai
.end_knows (VALUE
) < curr
.ii
.m_size
)
2864 avr_dump (";; Working on split_any %C:%m insn %d\n", curr
.ii
.m_code
,
2865 GET_MODE (SET_DEST (curr
.ii
.m_set
)), INSN_UID (curr
.insn
));
2867 const insninfo_t
&ii
= curr
.ii
;
2868 const int n_bytes
= ii
.m_size
;
2870 int binop_cost
= -1;
2872 // For plain AND, IOR, XOR get the current cost in units of words.
2873 if (BINARY_P (curr
.ii
.m_src
))
2875 const rtx_code code
= curr
.ii
.m_code
;
2876 if ((code
== IOR
|| code
== AND
|| code
== XOR
)
2877 && REG_P (XEXP (curr
.ii
.m_src
, 0))
2878 && CONSTANT_P (XEXP (curr
.ii
.m_src
, 1)))
2880 binop_cost
= get_attr_length (curr
.insn
);
2881 avr_dump (";; Competing against %C:%m cost = %d\n", code
,
2882 GET_MODE (curr
.ii
.m_src
), binop_cost
);
2886 // Step 1: Work out conflicts and which sign extends to perform.
2888 const gprmask_t regs_dest
= regmask (ii
.m_regno
, n_bytes
);
2890 gprmask_t regs_signs
= 0;
2891 bool has_lsl
= false;
2892 bool has_lsr
= false;
2894 for (int i
= 0; i
< n_bytes
; ++i
)
2896 const absint_byte_t
&aib
= ii
.m_ai
[i
];
2897 const int r_dest
= ii
.m_regno
+ i
;
2898 const gprmask_t regs_src
= aib
.reg_mask ();
2900 // When only regs to the right are used, or only regs to the left
2901 // are used, then there's no conflict like it is arising for rotates.
2902 // For now, only implement conflict-free splits.
2903 has_lsl
|= has_bits_in (regs_src
& regs_dest
, 0, r_dest
- 1);
2904 has_lsr
|= has_bits_in (regs_src
& regs_dest
, r_dest
+ 1, 31);
2905 if (has_lsl
&& has_lsr
)
2906 return fail ("has both << and >>");
2908 if (aib
.get_code () == SIGN_EXTEND
)
2910 const absint_val_t x0
= aib
.arg (0);
2913 else if (r_sign
!= x0
.regno
)
2914 return fail ("too many signs");
2916 // Signs are handled below after all the other bytes.
2917 regs_signs
|= regmask (r_dest
, 1);
2921 // Step 2: Work on the individual bytes and emit according insns.
2924 memento_t memo
= curr
.regs
;
2926 const int step
= has_lsl
? -1 : 1;
2927 const int istart
= step
== 1 ? 0 : n_bytes
- 1;
2928 const int iend
= step
== 1 ? n_bytes
: -1;
2930 for (int i
= istart
; i
!= iend
; i
+= step
)
2932 const absint_byte_t
&aib
= ii
.m_ai
[i
];
2933 const int r_dest
= ii
.m_regno
+ i
;
2934 rtx_code code
= aib
.get_code ();
2935 rtx xsrc
= NULL_RTX
;
2936 rtx xdest
= gen_rtx_REG (QImode
, r_dest
);
2940 const int r_src
= aib
.regno (false);
2941 const int val8
= aib
.val8 (false);
2945 if (r_dest
== r_src
)
2947 // ...or an existing 16-bit constant...
2948 else if (AVR_HAVE_MOVW
2950 // Next is not a no-op.
2951 && ii
.m_ai
[i
+ step
].regno (false) != r_dest
+ step
2952 // Eligible for MOVW.
2953 && r_dest
+ step
== (r_dest
^ 1)
2954 && r_dest
% 2 == i
% 2
2955 && (r16
= ii
.m_ai
.reg16_with_value (i
, i
+ step
, memo
)))
2957 xdest
= gen_rtx_REG (HImode
, r_dest
& ~1);
2958 xsrc
= gen_rtx_REG (HImode
, r16
);
2961 // ...or a reg-reg move from a multi-byte move...
2963 // Prefer a reg-reg move over a (potential) load
2964 // of a constant, because the subsequent RTL
2965 // peephole pass may combine it to a MOVW again.
2967 && REG_P (curr
.ii
.m_src
))
2968 xsrc
= gen_rtx_REG (QImode
, r_src
);
2969 // ...or a cheap constant...
2971 && AVRasm::constant_cost (SET
, r_dest
, val8
) <= 1)
2972 xsrc
= gen_int_mode (val8
, QImode
);
2973 // ...or a reg-reg move...
2975 xsrc
= gen_rtx_REG (QImode
, r_src
);
2976 // ...or a costly constant that already exists in some reg...
2977 else if (memo
.regno_with_value (val8
, 0 /* excludes: none */))
2978 xsrc
= gen_rtx_REG (QImode
, memo
.regno_with_value (val8
, 0));
2979 // ...or a costly constant loaded into curr.insn's scratch reg...
2980 else if (ii
.m_scratch
)
2982 rtx xscratch
= gen_rtx_REG (QImode
, ii
.m_scratch
);
2983 rtx xval8
= gen_int_mode (val8
, QImode
);
2984 emit_and_apply_move (memo
, xscratch
, xval8
);
2987 // ...or a costly constant (1 or -1) that doesn't need a scratch.
2988 else if (! AVRasm::ldi_needs_scratch (r_dest
, val8
))
2991 xsrc
= gen_int_mode (val8
, QImode
);
2994 return fail ("expensive val8");
2996 else if (aib
.arity () == 1)
2998 if (aib
.get_code () == SIGN_EXTEND
)
2999 // Signs are handled after all the others.
3003 const absint_val_t x0
= aib
.arg (0);
3004 rtx xop0
= gen_rtx_REG (QImode
, x0
.regno
);
3005 xsrc
= gen_rtx_fmt_e (code
, QImode
, xop0
);
3008 else if (aib
.arity () == 2)
3012 insn_optimize_data_t
iod (memo
);
3015 if (! get_2ary_operands (code
, aib
, iod
, r_dest
, x0
, x1
, extra_cost
))
3017 rtx xop0
= gen_rtx_REG (QImode
, x0
.regno
);
3018 rtx xop1
= x1
.knows_val8 ()
3019 ? gen_int_mode (x1
.val8
, QImode
)
3020 : gen_rtx_REG (QImode
, x1
.regno
);
3022 xsrc
= gen_rtx_fmt_ee (code
, QImode
, xop0
, xop1
);
3026 return fail ("no source found");
3029 && (regmask (xdest
) & regmask (r_sign
, 1)))
3030 return fail ("clobbered r_sign");
3032 emit_and_apply_move (memo
, xdest
, xsrc
);
3035 // Step 3: Emit insns for sign extend.
3036 // No more need to track memo beyond this point.
3038 if (! emit_signs (r_sign
, regs_signs
))
3041 if (binop_cost
>= 0)
3043 avr_dump (";; Expected cost: %d + %d\n", n_new_insns
, extra_cost
);
3044 if (n_new_insns
+ extra_cost
> binop_cost
)
3045 return fail ("too expensive");
3049 ignore_mask
= regmask (ii
.m_scratch
, 1);
3055 // A helper for try_split_any() above.
3056 // Emit sign extends from R_MSB.7 to all regs in REGS_SIGNS.
3058 optimize_data_t::emit_signs (const int r_msb
, gprmask_t regs_signs
)
3063 return fail ("fatal: no r_msb given");
3065 // Pick an arbitrary reg from the sign destinations when the source
3066 // isn't one of the signs.
3067 const int r_signs
= regs_signs
& regmask (r_msb
, 1)
3069 : ctz_hwi (regs_signs
);
3071 // Set all bits in r_signs according to the sign of r_msb using the
3072 // r,r,C07 alternative of ashrqi3.
3073 rtx xsrc
= gen_rtx_fmt_ee (ASHIFTRT
, QImode
,
3074 gen_rtx_REG (QImode
, r_msb
), GEN_INT (7));
3075 emit_valid_move_clobbercc (gen_rtx_REG (QImode
, r_signs
), xsrc
);
3076 regs_signs
&= ~regmask (r_signs
, 1);
3078 // Set up a 16-bit sign register if possible.
3080 if (regs_signs
& regmask (r_signs
^ 1, 1))
3082 emit_move_mask (r_signs
^ 1, r_signs
, 1, regs_signs
);
3083 r16_signs
= r_signs
& ~1;
3086 // Handle all 16-bit signs regs provided MOVW.
3088 for (int r
= FIRST_GPR
; r
< REG_32
; r
+= 2)
3090 const gprmask_t m
= regmask (r
, 2);
3091 if ((m
& regs_signs
) == m
)
3094 emit_move_mask (r
, r16_signs
, 2, regs_signs
);
3097 emit_move_mask (r
+ 0, r_signs
, 1, regs_signs
);
3098 emit_move_mask (r
+ 1, r_signs
, 1, regs_signs
);
3104 // Handle all remaining signs.
3106 emit_move_mask (ctz_hwi (regs_signs
), r_signs
, 1, regs_signs
);
3111 // Helper for the method above. Move N_BYTES registers from R_SRC to R_DST,
3112 // keeping track of which regs are still to be done in MASK.
3114 optimize_data_t::emit_move_mask (int r_dst
, int r_src
, int n_bytes
,
3117 const gprmask_t mask_dst
= regmask (r_dst
, n_bytes
);
3118 const gprmask_t mask_src
= regmask (r_src
, n_bytes
);
3119 gcc_assert ((mask_dst
& mask
) == mask_dst
);
3120 gcc_assert ((mask_src
& mask
) == 0);
3121 rtx xdst
= gen_rtx_REG (size_to_mode (n_bytes
), r_dst
);
3122 rtx xsrc
= gen_rtx_REG (size_to_mode (n_bytes
), r_src
);
3123 emit_valid_move_clobbercc (xdst
, xsrc
);
3130 bbinfo_t::optimize_one_block (bool &changed
)
3132 memento_t prev_regs
;
3134 rtx_insn
*insn
= next_nondebug_insn_bb (bb
, BB_HEAD (bb
));
3136 for (rtx_insn
*next_insn
; insn
; insn
= next_insn
)
3138 next_insn
= next_nondebug_insn_bb (bb
, insn
);
3140 avr_dump ("\n;; Working on insn %d\n%r\n\n", INSN_UID (insn
), insn
);
3142 optimize_data_t
od (prev_regs
, regs
);
3144 od
.prev
.insn
= prev_nondebug_insn_bb (bb
, insn
);
3145 od
.curr
.insn
= insn
;
3147 od
.prev
.ii
.init1 (od
.prev
, 1, "IIprev ");
3148 od
.curr
.ii
.init1 (od
.curr
, 8, "IIcurr ");
3152 bool found
= ((bbinfo_t::try_fuse_p
&& od
.try_fuse (this))
3153 || (bbinfo_t::try_bin_arg1_p
&& od
.try_bin_arg1 (this))
3154 || (bbinfo_t::try_simplify_p
&& od
.try_simplify (this))
3155 || (bbinfo_t::try_split_ldi_p
&& od
.try_split_ldi (this))
3156 || (bbinfo_t::try_split_any_p
&& od
.try_split_any (this))
3157 || (bbinfo_t::try_mem0_p
&& od
.try_mem0 (this)));
3159 rtx_insn
*new_insns
= get_insns ();
3162 gcc_assert (found
== (od
.n_new_insns
>= 0));
3166 // This insn will become the previous one in the next loop iteration.
3167 // Just used in dumps.
3168 rtx_insn
*new_curr_insn
;
3173 avr_dump (";; Keeping old route.\n");
3174 gcc_assert (! od
.delete_prev_p
);
3177 regs
.apply_insn (insn
, false);
3179 new_curr_insn
= insn
;
3183 // We have new_insns.
3188 avr_dump ("\n;; EMIT %d new insn%s replacing ",
3189 od
.n_new_insns
, "s" + (od
.n_new_insns
== 1));
3190 if (od
.delete_prev_p
)
3191 avr_dump ("insn %d and ", INSN_UID (od
.prev
.insn
));
3192 avr_dump ("insn %d, delete_prev=%d:\n%L\n", INSN_UID (insn
),
3193 od
.delete_prev_p
, new_insns
);
3196 new_curr_insn
= od
.emit_sequence (bb
, new_insns
);
3199 if (dump_file
&& new_curr_insn
)
3203 const int d
= regs
.distance_to (prev_regs
);
3204 if (d
|| new_curr_insn
!= insn
)
3205 avr_dump (";; %d regs changed state:\n", d
);
3207 if (new_curr_insn
!= insn
)
3209 avr_dump (";; Befor insn %d", INSN_UID (new_curr_insn
));
3213 avr_dump (";; After insn %d", INSN_UID (new_curr_insn
));
3221 bbinfo_t::optimize_one_function (function
*func
)
3223 bbinfo_t::fpd
= XNEW (bbinfo_t::find_plies_data_t
);
3224 bbinfo_t::bb_info
= XCNEWVEC (bbinfo_t
, last_basic_block_for_fn (func
));
3225 int *post_order
= XNEWVEC (int, n_basic_blocks_for_fn (func
));
3227 plies_t::max_n_plies
= 0;
3229 using elt0_getter_HRS
= elt0_getter
<HARD_REG_SET
, HARD_REG_ELT_TYPE
>;
3230 memento_t::fixed_regs_mask
= (gprmask_t
) elt0_getter_HRS::get (fixed_reg_set
);
3232 // Option -mfuse-move=<0,23> provides a 3:2:2:2 mixed radix value:
3233 // -mfuse-move= 0 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 20 1 2 3 Digit
3234 // fuse 1 1 1 1 1 1 1 1 1 1 1 1 0
3235 // bin_arg1 1 1 1 1 1 1 1 1 1 1 1 1 1
3236 // split_any 1 1 1 1 1 1 1 1 1 1 1 1 2
3237 // split_ldi 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3
3238 // use arith 1 1 1 1 1 1 1 1 3
3240 // Which optimization(s) to perform.
3241 bbinfo_t::try_fuse_p
= avropt_fuse_move
& 0x1; // Digit 0 in [0, 1].
3242 bbinfo_t::try_mem0_p
= avropt_fuse_move
& 0x1; // Digit 0 in [0, 1].
3243 bbinfo_t::try_bin_arg1_p
= avropt_fuse_move
& 0x2; // Digit 1 in [0, 1].
3244 bbinfo_t::try_split_any_p
= avropt_fuse_move
& 0x4; // Digit 2 in [0, 1].
3245 bbinfo_t::try_split_ldi_p
= avropt_fuse_move
>> 3; // Digit 3 in [0, 2].
3246 bbinfo_t::use_arith_p
= (avropt_fuse_move
>> 3) >= 2; // Digit 3 in [0, 2].
3247 bbinfo_t::use_set_some_p
= bbinfo_t::try_split_ldi_p
; // Digit 3 in [0, 2].
3248 bbinfo_t::try_simplify_p
= avropt_fuse_move
!= 0;
3250 // Topologically sort BBs from last to first.
3252 const int n_post_order
= post_order_compute (post_order
, false, false);
3253 bool changed
= false;
3255 // Traverse the BBs from first to last in order to increase the chance
3256 // that register values from all incoming edges are known.
3258 for (int n
= n_post_order
- 1; n
>= 0; --n
)
3260 basic_block bb
= BASIC_BLOCK_FOR_FN (func
, post_order
[n
]);
3262 bbinfo_t::bb_info
[bb
->index
].bb
= bb
;
3263 bbinfo_t::bb_info
[bb
->index
].enter ();
3264 bbinfo_t::bb_info
[bb
->index
].optimize_one_block (changed
);
3265 bbinfo_t::bb_info
[bb
->index
].leave ();
3268 if (plies_t::max_n_plies
)
3269 avr_dump (";; max_n_plies=%d\n", (int) plies_t::max_n_plies
);
3273 df_note_add_problem ();
3277 XDELETEVEC (post_order
);
3278 XDELETEVEC (bbinfo_t::bb_info
);
3279 XDELETE (bbinfo_t::fpd
);
3282 } // anonymous namespace
3289 //////////////////////////////////////////////////////////////////////////////
3290 // Try to replace 2 cbranch insns with 1 comparison and 2 branches.
3292 static const pass_data avr_pass_data_ifelse
=
3295 "", // name (will be patched)
3296 OPTGROUP_NONE
, // optinfo_flags
3297 TV_DF_SCAN
, // tv_id
3298 0, // properties_required
3299 0, // properties_provided
3300 0, // properties_destroyed
3301 0, // todo_flags_start
3302 TODO_df_finish
| TODO_df_verify
// todo_flags_finish
3305 class avr_pass_ifelse
: public rtl_opt_pass
3308 avr_pass_ifelse (gcc::context
*ctxt
, const char *name
)
3309 : rtl_opt_pass (avr_pass_data_ifelse
, ctxt
)
3314 bool gate (function
*) final override
3316 return optimize
> 0;
3319 unsigned int execute (function
*func
) final override
;
3320 }; // avr_pass_ifelse
3323 /* Return TRUE iff comparison code CODE is explicitly signed. */
3326 avr_strict_signed_p (rtx_code code
)
3328 return code
== GT
|| code
== GE
|| code
== LT
|| code
== LE
;
3332 /* Return TRUE iff comparison code CODE is explicitly unsigned. */
3335 avr_strict_unsigned_p (rtx_code code
)
3337 return code
== GTU
|| code
== GEU
|| code
== LTU
|| code
== LEU
;
3340 #include "config/avr/ranges.h"
3342 /* Suppose the inputs represent a code like
3344 if (x <CMP1> XVAL1) goto way1;
3345 if (x <CMP2> XVAL2) goto way2;
3348 with two integer mode comparisons where XVAL1 and XVAL2 are CONST_INT.
3349 When this can be rewritten in the form
3351 if (x <cond1> xval) goto way1;
3352 if (x <cond2> xval) goto way2;
3355 then set CMP1 = cond1, CMP2 = cond2, and return xval. Else return NULL_RTX.
3356 When SWAPT is returned true, then way1 and way2 must be swapped.
3357 When the incomping SWAPT is false, the outgoing one will be false, too. */
3360 avr_2comparisons_rhs (rtx_code
&cmp1
, rtx xval1
,
3361 rtx_code
&cmp2
, rtx xval2
,
3362 machine_mode mode
, bool &swapt
)
3364 const bool may_swapt
= swapt
;
3367 //////////////////////////////////////////////////////////////////
3368 // Step 0: Decide about signedness, map xval1/2 to the range
3369 // of [un]signed machine mode.
3371 const bool signed1_p
= avr_strict_signed_p (cmp1
);
3372 const bool signed2_p
= avr_strict_signed_p (cmp2
);
3373 const bool unsigned1_p
= avr_strict_unsigned_p (cmp1
);
3374 const bool unsigned2_p
= avr_strict_unsigned_p (cmp2
);
3375 const bool signed_p
= signed1_p
|| signed2_p
;
3376 bool unsigned_p
= unsigned1_p
|| unsigned2_p
;
3378 using T
= Ranges::scalar_type
;
3379 T val1
= INTVAL (xval1
);
3380 T val2
= INTVAL (xval2
);
3382 if (signed_p
+ unsigned_p
> 1)
3384 // Don't go down that rabbit hole. When the RHSs are the
3385 // same, we can still save one comparison.
3386 return val1
== val2
? xval1
: NULL_RTX
;
3389 // Decide about signedness. When no explicit signedness is present,
3390 // then cases that are close to the unsigned boundary like EQ 0, EQ 1
3391 // can also be optimized.
3393 || (! signed_p
&& IN_RANGE (val1
, -2, 2)))
3396 val1
= UINTVAL (xval1
) & GET_MODE_MASK (mode
);
3397 val2
= UINTVAL (xval2
) & GET_MODE_MASK (mode
);
3400 // No way we can decompose the domain in a usable manner when the
3401 // RHSes are too far apart.
3402 if (! IN_RANGE (val1
- val2
, -2, 2))
3405 //////////////////////////////////////////////////////////////////
3406 // Step 1: Represent the input conditions as truth Ranges. This
3407 // establishes a decomposition / coloring of the domain.
3409 Ranges dom
= Ranges::NBitsRanges (GET_MODE_BITSIZE (mode
), unsigned_p
,
3411 Ranges r
[4] = { dom
, dom
.truth (cmp1
, val1
), dom
.truth (cmp2
, val2
), dom
};
3413 // r[1] shadows r[2] shadows r[3]. r[0] is just for nice indices.
3418 //////////////////////////////////////////////////////////////////
3419 // Step 2: Filter for cases where the domain decomposes into three
3420 // intervals: One to the left, one to the right, and one
3421 // in the middle where the latter holds exactly one value.
3423 for (int i
= 1; i
<= 3; ++i
)
3425 // Keep track of which Ranges is which.
3428 gcc_assert (r
[i
].check ());
3430 // Filter for proper intervals. Also return for the empty set,
3431 // since cases where [m_min, m_max] decomposes into two intervals
3432 // or less have been sorted out by the generic optimizers already,
3433 // and hence should not be seen here. And more than two intervals
3434 // at a time cannot be optimized of course.
3435 if (r
[i
].size () != 1)
3439 // Bubble-sort the three intervals such that:
3440 // [1] is the left interval, i.e. the one taken by LT[U].
3441 // [2] is the middle interval, i.e. the one taken by EQ.
3442 // [3] is the right interval, i.e. the one taken by GT[U].
3443 Ranges::sort2 (r
[1], r
[3]);
3444 Ranges::sort2 (r
[2], r
[3]);
3445 Ranges::sort2 (r
[1], r
[2]);
3449 ";; Decomposed: .%d=[%ld, %ld] .%d=[%ld, %ld] .%d=[%ld, %ld]\n",
3450 r
[1].tag
, (long) r
[1].ranges
[0].lo
, (long) r
[1].ranges
[0].hi
,
3451 r
[2].tag
, (long) r
[2].ranges
[0].lo
, (long) r
[2].ranges
[0].hi
,
3452 r
[3].tag
, (long) r
[3].ranges
[0].lo
, (long) r
[3].ranges
[0].hi
);
3454 // EQ / NE can handle only one value.
3455 if (r
[2].cardinality (0) != 1)
3458 // Success! This is the sought for xval.
3459 const T val
= r
[2].ranges
[0].lo
;
3461 //////////////////////////////////////////////////////////////////
3462 // Step 3: Work out which label gets which condition, trying to
3463 // avoid the expensive codes GT[U] and LE[U] if possible.
3464 // Avoiding expensive codes is always possible when labels
3465 // way1 and way2 may be swapped.
3467 // The xx1 ways have an expensive GT for cmp1 which can be avoided
3468 // by swapping way1 with way2.
3469 swapt
= may_swapt
&& r
[3].tag
== 1;
3471 std::swap (r
[3], r
[2].tag
== 2 ? r
[2] : r
[1]);
3473 // 6 = 3! ways to assign LT, EQ, GT to the three labels.
3474 const int way
= 100 * r
[1].tag
+ 10 * r
[2].tag
+ r
[3].tag
;
3477 fprintf (dump_file
, ";; Success: unsigned=%d, swapt=%d, way=%d, rhs=%ld\n",
3478 unsigned_p
, swapt
, way
, (long) val
);
3480 #define WAY(w, c1, c2) \
3482 cmp1 = unsigned_p ? unsigned_condition (c1) : c1; \
3483 cmp2 = unsigned_p ? unsigned_condition (c2) : c2; \
3491 // cmp1 gets the LT, avoid difficult branches for cmp2.
3495 // cmp1 gets the EQ, avoid difficult branches for cmp2.
3499 // cmp1 gets the difficult GT, unavoidable as we may not swap way1/2.
3506 return gen_int_mode (val
, mode
);
3510 /* A helper for the next method. Suppose we have two conditional branches
3511 with REG and CONST_INT operands
3513 if (reg <cond1> xval1) goto label1;
3514 if (reg <cond2> xval2) goto label2;
3516 If the second comparison is redundant and there are codes <cmp1>
3517 and <cmp2> such that the sequence can be performed as
3519 REG_CC = compare (reg, xval);
3520 if (REG_CC <cmp1> 0) goto label1;
3521 if (REG_CC <cmp2> 0) goto label2;
3523 then set COND1 to cmp1, COND2 to cmp2, SWAPT to true when the branch
3524 targets have to be swapped, and return XVAL. Otherwise, return NULL_RTX.
3525 This function may clobber COND1 and COND2 even when it returns NULL_RTX.
3527 REVERSE_COND1 can be set to reverse condition COND1. This is useful
3528 when the second comparison does not follow the first one, but is
3529 located after label1 like in:
3531 if (reg <cond1> xval1) goto label1;
3534 if (reg <cond2> xval2) goto label2;
3536 In such a case we cannot swap the labels, and we may end up with a
3537 difficult branch -- though one comparison can still be optimized out.
3538 Getting rid of such difficult branches would require to reorder blocks. */
3541 avr_redundant_compare (rtx xreg1
, rtx_code
&cond1
, rtx xval1
,
3542 rtx xreg2
, rtx_code
&cond2
, rtx xval2
,
3543 bool reverse_cond1
, bool &swapt
)
3545 // Make sure we have two REG <cond> CONST_INT comparisons with the same reg.
3546 if (! rtx_equal_p (xreg1
, xreg2
)
3547 || ! CONST_INT_P (xval1
)
3548 || ! CONST_INT_P (xval2
))
3552 cond1
= reverse_condition (cond1
);
3554 // Allow swapping label1 <-> label2 only when ! reverse_cond1.
3555 swapt
= ! reverse_cond1
;
3556 rtx_code c1
= cond1
;
3557 rtx_code c2
= cond2
;
3558 rtx xval
= avr_2comparisons_rhs (c1
, xval1
,
3559 c2
, xval2
, GET_MODE (xreg1
), swapt
);
3565 rtx_code a1
= reverse_cond1
? reverse_condition (cond1
) : cond1
;
3566 rtx_code b1
= reverse_cond1
? reverse_condition (c1
) : c1
;
3567 const char *s_rev1
= reverse_cond1
? " reverse_cond1" : "";
3568 avr_dump (";; cond1: %C %r%s\n", a1
, xval1
, s_rev1
);
3569 avr_dump (";; cond2: %C %r\n", cond2
, xval2
);
3570 avr_dump (";; => %C %d\n", b1
, (int) INTVAL (xval
));
3571 avr_dump (";; => %C %d\n", c2
, (int) INTVAL (xval
));
3581 /* Similar to the function above, but assume that
3583 if (xreg1 <cond1> xval1) goto label1;
3584 if (xreg2 <cond2> xval2) goto label2;
3586 are two subsequent REG-REG comparisons. When this can be represented as
3588 REG_CC = compare (reg, xval);
3589 if (REG_CC <cmp1> 0) goto label1;
3590 if (REG_CC <cmp2> 0) goto label2;
3592 then set XREG1 to reg, COND1 and COND2 accordingly, and return xval.
3593 Otherwise, return NULL_RTX. This optmization can be performed
3594 when { xreg1, xval1 } and { xreg2, xval2 } are equal as sets.
3595 It can be done in such a way that no difficult branches occur. */
3598 avr_redundant_compare_regs (rtx
&xreg1
, rtx_code
&cond1
, rtx
&xval1
,
3599 rtx
&xreg2
, rtx_code
&cond2
, rtx
&xval2
,
3604 if (! REG_P (xval1
))
3606 else if (rtx_equal_p (xreg1
, xreg2
)
3607 && rtx_equal_p (xval1
, xval2
))
3609 else if (rtx_equal_p (xreg1
, xval2
)
3610 && rtx_equal_p (xreg2
, xval1
))
3615 // Found a redundant REG-REG comparison. Assume that the incoming
3616 // representation has been canonicalized by CANONICALIZE_COMPARISON.
3617 // We can always represent this using only one comparison and in such
3618 // a way that no difficult branches are required.
3622 const char *s_rev1
= reverse_cond1
? " reverse_cond1" : "";
3623 avr_dump (";; %r %C %r%s\n", xreg1
, cond1
, xval1
, s_rev1
);
3624 avr_dump (";; %r %C %r\n", xreg2
, cond2
, xval2
);
3628 cond1
= reverse_condition (cond1
);
3632 if (cond1
== EQ
|| cond1
== NE
)
3634 avr_dump (";; case #21\n");
3635 std::swap (xreg1
, xval1
);
3639 std::swap (xreg2
, xval2
);
3640 cond2
= swap_condition (cond2
);
3642 // The swap may have introduced a difficult comparison.
3643 // In order to get of it, only a few cases need extra care.
3644 if ((cond1
== LT
&& cond2
== GT
)
3645 || (cond1
== LTU
&& cond2
== GTU
))
3647 avr_dump (";; case #22\n");
3651 avr_dump (";; case #23\n");
3655 avr_dump (";; case #20\n");
3661 /* INSN1 and INSN2 are two cbranch insns for the same integer mode.
3662 When FOLLOW_LABEL1 is false, then INSN2 is located in the fallthrough
3663 path of INSN1. When FOLLOW_LABEL1 is true, then INSN2 is located at
3664 the true edge of INSN1, INSN2 is preceded by a barrier, and no other
3665 edge leads to the basic block of INSN2.
3667 Try to replace INSN1 and INSN2 by a compare insn and two branch insns.
3668 When such a replacement has been performed, then return the insn where the
3669 caller should continue scanning the insn stream. Else, return nullptr. */
3672 avr_optimize_2ifelse (rtx_jump_insn
*insn1
,
3673 rtx_jump_insn
*insn2
, bool follow_label1
)
3675 avr_dump (";; Investigating jump_insn %d and jump_insn %d.\n",
3676 INSN_UID (insn1
), INSN_UID (insn2
));
3678 // Extract the operands of the insns:
3679 // $0 = comparison operator ($1, $2)
3681 // $2 = reg or const_int
3683 // $4 = optional SCRATCH for HI, PSI, SI cases.
3685 const auto &op
= recog_data
.operand
;
3687 extract_insn (insn1
);
3688 rtx xop1
[5] = { op
[0], op
[1], op
[2], op
[3], op
[4] };
3689 int n_operands
= recog_data
.n_operands
;
3691 extract_insn (insn2
);
3692 rtx xop2
[5] = { op
[0], op
[1], op
[2], op
[3], op
[4] };
3694 rtx_code code1
= GET_CODE (xop1
[0]);
3695 rtx_code code2
= GET_CODE (xop2
[0]);
3696 bool swap_targets
= false;
3698 // Search redundant REG-REG comparison.
3699 rtx xval
= avr_redundant_compare_regs (xop1
[1], code1
, xop1
[2],
3700 xop2
[1], code2
, xop2
[2],
3703 // Search redundant REG-CONST_INT comparison.
3705 xval
= avr_redundant_compare (xop1
[1], code1
, xop1
[2],
3706 xop2
[1], code2
, xop2
[2],
3707 follow_label1
, swap_targets
);
3710 avr_dump (";; Nothing found for jump_insn %d and jump_insn %d.\n",
3711 INSN_UID (insn1
), INSN_UID (insn2
));
3716 code1
= reverse_condition (code1
);
3718 //////////////////////////////////////////////////////
3719 // Found a replacement.
3723 avr_dump (";; => %C %r\n", code1
, xval
);
3724 avr_dump (";; => %C %r\n", code2
, xval
);
3726 fprintf (dump_file
, "\n;; Found chain of jump_insn %d and"
3727 " jump_insn %d, follow_label1=%d:\n",
3728 INSN_UID (insn1
), INSN_UID (insn2
), follow_label1
);
3729 print_rtl_single (dump_file
, PATTERN (insn1
));
3730 print_rtl_single (dump_file
, PATTERN (insn2
));
3734 = next_nonnote_nondebug_insn (follow_label1
? insn1
: insn2
);
3736 // Pop the new branch conditions and the new comparison.
3737 // Prematurely split into compare + branch so that we can drop
3738 // the 2nd comparison. The following pass, split2, splits all
3739 // insns for REG_CC, and it should still work as usual even when
3740 // there are already some REG_CC insns around.
3742 rtx xcond1
= gen_rtx_fmt_ee (code1
, VOIDmode
, cc_reg_rtx
, const0_rtx
);
3743 rtx xcond2
= gen_rtx_fmt_ee (code2
, VOIDmode
, cc_reg_rtx
, const0_rtx
);
3744 rtx xpat1
= gen_branch (xop1
[3], xcond1
);
3745 rtx xpat2
= gen_branch (xop2
[3], xcond2
);
3746 rtx xcompare
= NULL_RTX
;
3747 machine_mode mode
= GET_MODE (xop1
[1]);
3751 gcc_assert (n_operands
== 4);
3752 xcompare
= gen_cmpqi3 (xop1
[1], xval
);
3756 gcc_assert (n_operands
== 5);
3757 rtx scratch
= GET_CODE (xop1
[4]) == SCRATCH
? xop2
[4] : xop1
[4];
3758 rtx (*gen_cmp
)(rtx
,rtx
,rtx
)
3759 = mode
== HImode
? gen_gen_comparehi
3760 : mode
== PSImode
? gen_gen_comparepsi
3761 : gen_gen_comparesi
; // SImode
3762 xcompare
= gen_cmp (xop1
[1], xval
, scratch
);
3767 rtx_insn
*cmp
= emit_insn_before (xcompare
, insn1
);
3768 rtx_jump_insn
*branch1
= emit_jump_insn_after (xpat1
, insn1
);
3769 rtx_jump_insn
*branch2
= emit_jump_insn_after (xpat2
, insn2
);
3771 JUMP_LABEL (branch1
) = xop1
[3];
3772 JUMP_LABEL (branch2
) = xop2
[3];
3773 // delete_insn() decrements LABEL_NUSES when deleting a JUMP_INSN,
3774 // but when we pop a new JUMP_INSN, do it by hand.
3775 ++LABEL_NUSES (xop1
[3]);
3776 ++LABEL_NUSES (xop2
[3]);
3778 delete_insn (insn1
);
3779 delete_insn (insn2
);
3783 gcc_assert (! follow_label1
);
3785 basic_block to1
= BLOCK_FOR_INSN (xop1
[3]);
3786 basic_block to2
= BLOCK_FOR_INSN (xop2
[3]);
3787 edge e1
= find_edge (BLOCK_FOR_INSN (branch1
), to1
);
3788 edge e2
= find_edge (BLOCK_FOR_INSN (branch2
), to2
);
3791 redirect_edge_and_branch (e1
, to2
);
3792 redirect_edge_and_branch (e2
, to1
);
3795 // As a side effect, also recog the new insns.
3796 gcc_assert (valid_insn_p (cmp
));
3797 gcc_assert (valid_insn_p (branch1
));
3798 gcc_assert (valid_insn_p (branch2
));
3806 SREG = compare (reg, 1 + val);
3807 if (SREG >= 0) goto label1;
3808 SREG = compare (reg, val);
3809 if (SREG == 0) goto label2;
3813 SREG = compare (reg, val);
3814 if (SREG == 0) goto label2;
3815 if (SREG >= 0) goto label1;
3817 Almost all cases where one of the comparisons is redundant can
3818 be transformed in such a way that only one comparison is required
3819 and no difficult branches are needed. */
3822 avr_pass_ifelse::execute (function
*)
3824 rtx_insn
*next_insn
;
3826 for (rtx_insn
*insn
= get_insns (); insn
; insn
= next_insn
)
3828 next_insn
= next_nonnote_nondebug_insn (insn
);
3833 // Search for two cbranch insns. The first one is a cbranch.
3834 // Filter for "cbranch<mode>4_insn" with mode in QI, HI, PSI, SI.
3836 if (! JUMP_P (insn
))
3839 int icode1
= recog_memoized (insn
);
3841 if (icode1
!= CODE_FOR_cbranchqi4_insn
3842 && icode1
!= CODE_FOR_cbranchhi4_insn
3843 && icode1
!= CODE_FOR_cbranchpsi4_insn
3844 && icode1
!= CODE_FOR_cbranchsi4_insn
)
3847 rtx_jump_insn
*insn1
= as_a
<rtx_jump_insn
*> (insn
);
3849 // jmp[0]: We can optimize cbranches that follow cbranch insn1.
3850 rtx_insn
*jmp
[2] = { next_insn
, nullptr };
3852 // jmp[1]: A cbranch following the label of cbranch insn1.
3853 if (LABEL_NUSES (JUMP_LABEL (insn1
)) == 1)
3855 rtx_insn
*code_label1
= JUMP_LABEL_AS_INSN (insn1
);
3856 rtx_insn
*barrier
= prev_nonnote_nondebug_insn (code_label1
);
3858 // When the target label of insn1 is used exactly once and is
3859 // not a fallthrough, i.e. is preceded by a barrier, then
3860 // consider the insn following that label.
3861 if (barrier
&& BARRIER_P (barrier
))
3862 jmp
[1] = next_nonnote_nondebug_insn (code_label1
);
3865 // With almost certainty, only one of the two possible jumps can
3866 // be optimized with insn1, but it's hard to tell which one a priori.
3867 // Just try both. In the unlikely case where both could be optimized,
3868 // prefer jmp[0] because eliminating difficult branches is impeded
3869 // by following label1.
3871 for (int j
= 0; j
< 2; ++j
)
3872 if (jmp
[j
] && JUMP_P (jmp
[j
])
3873 && recog_memoized (jmp
[j
]) == icode1
)
3876 = avr_optimize_2ifelse (insn1
, as_a
<rtx_jump_insn
*> (jmp
[j
]),
3877 j
== 1 /* follow_label1 */);
3892 //////////////////////////////////////////////////////////////////////////////
3893 // Optimize results of the casesi expander for modes < SImode.
3895 static const pass_data avr_pass_data_casesi
=
3898 "", // name (will be patched)
3899 OPTGROUP_NONE
, // optinfo_flags
3900 TV_DF_SCAN
, // tv_id
3901 0, // properties_required
3902 0, // properties_provided
3903 0, // properties_destroyed
3904 0, // todo_flags_start
3905 0 // todo_flags_finish
3908 class avr_pass_casesi
: public rtl_opt_pass
3911 avr_pass_casesi (gcc::context
*ctxt
, const char *name
)
3912 : rtl_opt_pass (avr_pass_data_casesi
, ctxt
)
3917 bool gate (function
*) final override
3919 return optimize
> 0;
3922 unsigned int execute (function
*) final override
;
3923 }; // avr_pass_casesi
3926 /* Make one parallel insn with all the patterns from insns i[0]..i[5]. */
3929 avr_parallel_insn_from_insns (rtx_insn
*i
[5])
3931 rtvec vec
= gen_rtvec (5, PATTERN (i
[0]), PATTERN (i
[1]), PATTERN (i
[2]),
3932 PATTERN (i
[3]), PATTERN (i
[4]));
3934 emit (gen_rtx_PARALLEL (VOIDmode
, vec
));
3935 rtx_insn
*insn
= get_insns ();
3942 /* Return true if we see an insn stream generated by casesi expander together
3943 with an extension to SImode of the switch value.
3945 If this is the case, fill in the insns from casesi to INSNS[1..5] and
3946 the SImode extension to INSNS[0]. Moreover, extract the operands of
3947 pattern casesi_<mode>_sequence forged from the sequence to recog_data. */
3950 avr_is_casesi_sequence (basic_block bb
, rtx_insn
*insn
, rtx_insn
*insns
[5])
3954 /* A first and quick test for a casesi sequences. As a side effect of
3955 the test, harvest respective insns to INSNS[0..4]. */
3957 if (!(JUMP_P (insns
[4] = insn
)
3958 // casesi is the only insn that comes up with UNSPEC_INDEX_JMP,
3959 // hence the following test ensures that we are actually dealing
3960 // with code from casesi.
3961 && (set_4
= single_set (insns
[4]))
3962 && UNSPEC
== GET_CODE (SET_SRC (set_4
))
3963 && UNSPEC_INDEX_JMP
== XINT (SET_SRC (set_4
), 1)
3965 && (insns
[3] = prev_real_insn (insns
[4]))
3966 && (insns
[2] = prev_real_insn (insns
[3]))
3967 && (insns
[1] = prev_real_insn (insns
[2]))
3969 // Insn prior to casesi.
3970 && (insns
[0] = prev_real_insn (insns
[1]))
3971 && (set_0
= single_set (insns
[0]))
3972 && extend_operator (SET_SRC (set_0
), SImode
)))
3979 fprintf (dump_file
, ";; Sequence from casesi in "
3980 "[bb %d]:\n\n", bb
->index
);
3981 for (int i
= 0; i
< 5; i
++)
3982 print_rtl_single (dump_file
, insns
[i
]);
3985 /* We have to deal with quite some operands. Extracting them by hand
3986 would be tedious, therefore wrap the insn patterns into a parallel,
3987 run recog against it and then use insn extract to get the operands. */
3989 rtx_insn
*xinsn
= avr_parallel_insn_from_insns (insns
);
3991 INSN_CODE (xinsn
) = recog (PATTERN (xinsn
), xinsn
, NULL
/* num_clobbers */);
3993 /* Failing to recognize means that someone changed the casesi expander or
3994 that some passes prior to this one performed some unexpected changes.
3995 Gracefully drop such situations instead of aborting. */
3997 if (INSN_CODE (xinsn
) < 0)
4000 fprintf (dump_file
, ";; Sequence not recognized, giving up.\n\n");
4005 gcc_assert (CODE_FOR_casesi_qi_sequence
== INSN_CODE (xinsn
)
4006 || CODE_FOR_casesi_hi_sequence
== INSN_CODE (xinsn
));
4008 extract_insn (xinsn
);
4010 // Assert on the anatomy of xinsn's operands we are going to work with.
4012 gcc_assert (recog_data
.n_operands
== 12);
4013 gcc_assert (recog_data
.n_dups
== 3);
4017 fprintf (dump_file
, ";; Operands extracted:\n");
4018 for (int i
= 0; i
< recog_data
.n_operands
; i
++)
4019 avr_fdump (dump_file
, ";; $%d = %r\n", i
, recog_data
.operand
[i
]);
4020 fprintf (dump_file
, "\n");
4027 /* INSNS[1..4] is a sequence as generated by casesi and INSNS[0] is an
4028 extension of an 8-bit or 16-bit integer to SImode. XOP contains the
4029 operands of INSNS as extracted by insn_extract from pattern
4030 casesi_<mode>_sequence:
4032 $0: SImode reg switch value as result of $10.
4033 $1: Negative of smallest index in switch.
4034 $2: Number of entries in switch.
4036 $4: Label if out-of-bounds.
4038 $6: 3-byte PC: subreg:HI ($5) + label_ref ($3)
4039 2-byte PC: subreg:HI ($5)
4040 $7: HI reg index into table (Z or pseudo)
4041 $8: Z or scratch:HI (to be clobbered)
4042 $9: R24 or const0_rtx (to be clobbered)
4043 $10: Extension to SImode of an 8-bit or 16-bit integer register $11.
4044 $11: QImode or HImode register input of $10.
4046 Try to optimize this sequence, i.e. use the original HImode / QImode
4047 switch value instead of SImode. */
4050 avr_optimize_casesi (rtx_insn
*insns
[5], rtx
*xop
)
4052 // Original mode of the switch value; this is QImode or HImode.
4053 machine_mode mode
= GET_MODE (xop
[11]);
4055 // How the original switch value was extended to SImode; this is
4056 // SIGN_EXTEND or ZERO_EXTEND.
4057 rtx_code code
= GET_CODE (xop
[10]);
4059 // Lower index, upper index (plus one) and range of case calues.
4060 HOST_WIDE_INT low_idx
= -INTVAL (xop
[1]);
4061 HOST_WIDE_INT num_idx
= INTVAL (xop
[2]);
4062 HOST_WIDE_INT hig_idx
= low_idx
+ num_idx
;
4064 // Maximum ranges of (un)signed QImode resp. HImode.
4065 unsigned umax
= QImode
== mode
? 0xff : 0xffff;
4066 int imax
= QImode
== mode
? 0x7f : 0x7fff;
4067 int imin
= -imax
- 1;
4069 // Testing the case range and whether it fits into the range of the
4070 // (un)signed mode. This test should actually always pass because it
4071 // makes no sense to have case values outside the mode range. Notice
4072 // that case labels which are unreachable because they are outside the
4073 // mode of the switch value (e.g. "case -1" for uint8_t) have already
4074 // been thrown away by the middle-end.
4076 if (SIGN_EXTEND
== code
4082 else if (ZERO_EXTEND
== code
4084 && (unsigned) hig_idx
<= umax
)
4091 fprintf (dump_file
, ";; Case ranges too big, giving up.\n\n");
4095 // Do normalization of switch value $10 and out-of-bound check in its
4096 // original mode instead of in SImode. Use a newly created pseudo.
4097 // This will replace insns[1..2].
4101 rtx reg
= copy_to_mode_reg (mode
, xop
[11]);
4103 rtx (*gen_add
)(rtx
,rtx
,rtx
) = QImode
== mode
? gen_addqi3
: gen_addhi3
;
4104 rtx (*gen_cbranch
)(rtx
,rtx
,rtx
,rtx
)
4105 = QImode
== mode
? gen_cbranchqi4
: gen_cbranchhi4
;
4107 emit_insn (gen_add (reg
, reg
, gen_int_mode (-low_idx
, mode
)));
4108 rtx op0
= reg
; rtx op1
= gen_int_mode (num_idx
, mode
);
4109 rtx labelref
= copy_rtx (xop
[4]);
4110 rtx xbranch
= gen_cbranch (gen_rtx_fmt_ee (GTU
, VOIDmode
, op0
, op1
),
4111 op0
, op1
, labelref
);
4112 rtx_insn
*cbranch
= emit_jump_insn (xbranch
);
4113 JUMP_LABEL (cbranch
) = xop
[4];
4114 ++LABEL_NUSES (xop
[4]);
4116 rtx_insn
*seq1
= get_insns ();
4117 rtx_insn
*last1
= get_last_insn ();
4120 emit_insn_after (seq1
, insns
[2]);
4122 // After the out-of-bounds test and corresponding branch, use a
4123 // 16-bit index. If QImode is used, extend it to HImode first.
4124 // This will replace insns[4].
4129 reg
= force_reg (HImode
, gen_rtx_fmt_e (code
, HImode
, reg
));
4131 rtx pat_4
= AVR_3_BYTE_PC
4132 ? gen_movhi (xop
[7], reg
)
4133 : gen_addhi3 (xop
[7], reg
, gen_rtx_LABEL_REF (VOIDmode
, xop
[3]));
4137 rtx_insn
*seq2
= get_insns ();
4138 rtx_insn
*last2
= get_last_insn ();
4141 emit_insn_after (seq2
, insns
[3]);
4145 fprintf (dump_file
, ";; New insns: ");
4147 for (rtx_insn
*insn
= seq1
; ; insn
= NEXT_INSN (insn
))
4149 fprintf (dump_file
, "%d, ", INSN_UID (insn
));
4153 for (rtx_insn
*insn
= seq2
; ; insn
= NEXT_INSN (insn
))
4155 fprintf (dump_file
, "%d%s", INSN_UID (insn
),
4156 insn
== last2
? ".\n\n" : ", ");
4161 fprintf (dump_file
, ";; Deleting insns: %d, %d, %d.\n\n",
4162 INSN_UID (insns
[1]), INSN_UID (insns
[2]), INSN_UID (insns
[3]));
4165 // Pseudodelete the SImode and subreg of SImode insns. We don't care
4166 // about the extension insns[0]: Its result is now unused and other
4167 // passes will clean it up.
4169 SET_INSN_DELETED (insns
[1]);
4170 SET_INSN_DELETED (insns
[2]);
4171 SET_INSN_DELETED (insns
[3]);
4176 avr_pass_casesi::execute (function
*func
)
4180 FOR_EACH_BB_FN (bb
, func
)
4182 rtx_insn
*insn
, *insns
[5];
4184 FOR_BB_INSNS (bb
, insn
)
4186 if (avr_is_casesi_sequence (bb
, insn
, insns
))
4188 avr_optimize_casesi (insns
, recog_data
.operand
);
4196 } // anonymous namespace
4198 /* Perform some extra checks on operands of casesi_<mode>_sequence.
4199 Not all operand dependencies can be described by means of predicates.
4200 This function performs left over checks and should always return true.
4201 Returning false means that someone changed the casesi expander but did
4202 not adjust casesi_<mode>_sequence. */
4205 avr_casei_sequence_check_operands (rtx
*xop
)
4207 rtx sub_5
= NULL_RTX
;
4209 if (AVR_HAVE_EIJMP_EICALL
4210 // The last clobber op of the tablejump.
4211 && xop
[9] == all_regs_rtx
[REG_24
])
4213 // $6 is: (subreg:SI ($5) 0)
4217 if (!AVR_HAVE_EIJMP_EICALL
4218 // $6 is: (plus:HI (subreg:SI ($5) 0)
4219 // (label_ref ($3)))
4220 && PLUS
== GET_CODE (xop
[6])
4221 && LABEL_REF
== GET_CODE (XEXP (xop
[6], 1))
4222 && rtx_equal_p (xop
[3], XEXP (XEXP (xop
[6], 1), 0))
4223 // The last clobber op of the tablejump.
4224 && xop
[9] == const0_rtx
)
4226 sub_5
= XEXP (xop
[6], 0);
4231 && SUBREG_BYTE (sub_5
) == 0
4232 && rtx_equal_p (xop
[5], SUBREG_REG (sub_5
)))
4236 fprintf (dump_file
, "\n;; Failed condition for casesi_<mode>_sequence\n\n");
4245 //////////////////////////////////////////////////////////////////////////////
4246 // Find more POST_INC and PRE_DEC cases.
4248 static const pass_data avr_pass_data_fuse_add
=
4251 "", // name (will be patched)
4252 OPTGROUP_NONE
, // optinfo_flags
4253 TV_MACH_DEP
, // tv_id
4254 0, // properties_required
4255 0, // properties_provided
4256 0, // properties_destroyed
4257 0, // todo_flags_start
4258 TODO_df_finish
// todo_flags_finish
4261 class avr_pass_fuse_add
: public rtl_opt_pass
4264 avr_pass_fuse_add (gcc::context
*ctxt
, const char *name
)
4265 : rtl_opt_pass (avr_pass_data_fuse_add
, ctxt
)
4270 // Cloning is required because we are running one instance of the pass
4271 // before peephole2. and a second one after cprop_hardreg.
4272 opt_pass
* clone () final override
4274 return make_avr_pass_fuse_add (m_ctxt
);
4277 unsigned int execute (function
*func
) final override
4279 func
->machine
->n_avr_fuse_add_executed
+= 1;
4280 n_avr_fuse_add_executed
= func
->machine
->n_avr_fuse_add_executed
;
4282 if (optimize
&& avropt_fuse_add
> 0)
4283 return execute1 (func
);
4287 unsigned int execute1 (function
*);
4291 rtx_insn
*insn
= nullptr;
4293 bool valid () const { return insn
!= nullptr; }
4297 SET_INSN_DELETED (insn
);
4302 // If .insn is not NULL, then this is a reg:HI += const_int
4303 // of an address register.
4304 struct Add_Insn
: Some_Insn
4309 Add_Insn (rtx_insn
*insn
);
4312 // If .insn is not NULL, then this sets an address register
4313 // to a constant value.
4314 struct Ldi_Insn
: Some_Insn
4318 Ldi_Insn (rtx_insn
*insn
);
4321 // If .insn is not NULL, then this is a load or store insn where the
4322 // address is REG or POST_INC with an address register.
4323 struct Mem_Insn
: Some_Insn
4325 rtx reg_or_0
, mem
, addr
, addr_reg
;
4329 addr_space_t addr_space
;
4330 bool store_p
, volatile_p
;
4332 Mem_Insn (rtx_insn
*insn
);
4335 rtx_insn
*fuse_ldi_add (Ldi_Insn
&prev_ldi
, Add_Insn
&add
);
4336 rtx_insn
*fuse_add_add (Add_Insn
&prev_add
, Add_Insn
&add
);
4337 rtx_insn
*fuse_add_mem (Add_Insn
&prev_add
, Mem_Insn
&mem
);
4338 rtx_insn
*fuse_mem_add (Mem_Insn
&prev_mem
, Add_Insn
&add
);
4339 }; // avr_pass_fuse_add
4342 /* Describe properties of AVR's indirect load and store instructions
4343 LD, LDD, ST, STD, LPM, ELPM depending on register number, volatility etc.
4344 Rules for "volatile" accesses are:
4347 ------+-----------------+----------------
4348 load | read LSB first | read LSB first
4349 store | write LSB first | write MSB first
4352 struct AVR_LdSt_Props
4354 bool has_postinc
, has_predec
, has_ldd
;
4355 // The insn printers will use POST_INC or PRE_DEC addressing, no matter
4356 // what adressing modes we are feeding into them.
4357 bool want_postinc
, want_predec
;
4359 AVR_LdSt_Props (int regno
, bool store_p
, bool volatile_p
, addr_space_t as
)
4361 bool generic_p
= ADDR_SPACE_GENERIC_P (as
);
4362 bool flashx_p
= (! generic_p
4363 && as
!= ADDR_SPACE_MEMX
&& as
!= ADDR_SPACE_FLASHX
);
4364 has_postinc
= generic_p
|| (flashx_p
&& regno
== REG_Z
);
4365 has_predec
= generic_p
;
4366 has_ldd
= ! AVR_TINY
&& generic_p
&& (regno
== REG_Y
|| regno
== REG_Z
);
4367 want_predec
= volatile_p
&& generic_p
&& ! AVR_XMEGA
&& store_p
;
4368 want_postinc
= volatile_p
&& generic_p
&& (AVR_XMEGA
|| ! store_p
);
4369 want_postinc
|= flashx_p
&& regno
== REG_Z
;
4372 AVR_LdSt_Props (const avr_pass_fuse_add::Mem_Insn
&m
)
4373 : AVR_LdSt_Props (m
.addr_regno
, m
.store_p
, m
.volatile_p
, m
.addr_space
)
4375 gcc_assert (m
.valid ());
4380 /* Emit a single_set that clobbers REG_CC. */
4383 emit_move_ccc (rtx dest
, rtx src
)
4385 return emit_insn (gen_gen_move_clobbercc (dest
, src
));
4389 /* Emit a single_set that clobbers REG_CC after insn AFTER. */
4392 emit_move_ccc_after (rtx dest
, rtx src
, rtx_insn
*after
)
4394 return emit_insn_after (gen_gen_move_clobbercc (dest
, src
), after
);
4398 reg_seen_between_p (const_rtx reg
, const rtx_insn
*from
, const rtx_insn
*to
)
4400 return (reg_used_between_p (reg
, from
, to
)
4401 || reg_set_between_p (reg
, from
, to
));
4406 avr_maybe_adjust_cfa (rtx_insn
*insn
, rtx reg
, int addend
)
4409 && frame_pointer_needed
4410 && REGNO (reg
) == FRAME_POINTER_REGNUM
4411 && avropt_fuse_add
== 3)
4413 rtx plus
= plus_constant (Pmode
, reg
, addend
);
4414 RTX_FRAME_RELATED_P (insn
) = 1;
4415 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, gen_rtx_SET (reg
, plus
));
4420 // If successful, this represents a SET of a pointer register to a constant.
4421 avr_pass_fuse_add::Ldi_Insn::Ldi_Insn (rtx_insn
*insn
)
4423 rtx set
= single_set (insn
);
4427 src
= SET_SRC (set
);
4428 dest
= SET_DEST (set
);
4431 && GET_MODE (dest
) == Pmode
4432 && IN_RANGE (regno
= REGNO (dest
), REG_X
, REG_Z
)
4433 && CONSTANT_P (src
))
4439 // If successful, this represents a PLUS with CONST_INT of a pointer
4440 // register X, Y or Z. Otherwise, the object is not valid().
4441 avr_pass_fuse_add::Add_Insn::Add_Insn (rtx_insn
*insn
)
4443 rtx set
= single_set (insn
);
4447 src
= SET_SRC (set
);
4448 dest
= SET_DEST (set
);
4450 // We are only interested in PLUSes that change address regs.
4451 && GET_MODE (dest
) == Pmode
4452 && IN_RANGE (regno
= REGNO (dest
), REG_X
, REG_Z
)
4453 && PLUS
== GET_CODE (src
)
4454 && rtx_equal_p (XEXP (src
, 0), dest
)
4455 && CONST_INT_P (XEXP (src
, 1)))
4457 // This is reg:HI += const_int.
4458 addend
= XEXP (src
, 1);
4463 // If successful, this represents a load or store insn where the addressing
4464 // mode uses pointer register X, Y or Z. Otherwise, the object is not valid().
4465 avr_pass_fuse_add::avr_pass_fuse_add::Mem_Insn::Mem_Insn (rtx_insn
*insn
)
4467 rtx set
= single_set (insn
);
4471 src
= SET_SRC (set
);
4472 dest
= SET_DEST (set
);
4473 mode
= GET_MODE (dest
);
4476 && (REG_P (src
) || src
== CONST0_RTX (mode
)))
4481 else if (REG_P (dest
) && MEM_P (src
))
4489 if (avr_mem_memx_p (mem
)
4490 || avr_load_libgcc_p (mem
))
4493 addr
= XEXP (mem
, 0);
4494 addr_code
= GET_CODE (addr
);
4496 if (addr_code
== REG
)
4498 else if (addr_code
== POST_INC
|| addr_code
== PRE_DEC
)
4499 addr_reg
= XEXP (addr
, 0);
4503 addr_regno
= REGNO (addr_reg
);
4505 if (avropt_fuse_add
== 2
4506 && frame_pointer_needed
4507 && addr_regno
== FRAME_POINTER_REGNUM
)
4508 MEM_VOLATILE_P (mem
) = 0;
4510 if (reg_overlap_mentioned_p (reg_or_0
, addr
) // Can handle CONSTANT_P.
4511 || addr_regno
> REG_Z
4512 || avr_mem_memx_p (mem
)
4513 // The following optimizations only handle REG and POST_INC,
4514 // so that's all what we allow here.
4515 || (addr_code
!= REG
&& addr_code
!= POST_INC
))
4518 addr_space
= MEM_ADDR_SPACE (mem
);
4519 volatile_p
= MEM_VOLATILE_P (mem
);
4520 store_p
= MEM_P (dest
);
4522 // Turn this "valid".
4526 /* Try to combine a Ldi insn with a PLUS CONST_INT addend to one Ldi insn.
4527 If LDI is valid, then it precedes ADD in the same block.
4528 When a replacement is found, a new insn is emitted and the old insns
4529 are pseudo-deleted. The returned insn is the point where the calling
4530 scanner should continue. When no replacement is found, nullptr is
4531 returned and nothing changed. */
4534 avr_pass_fuse_add::fuse_ldi_add (Ldi_Insn
&ldi
, Add_Insn
&add
)
4537 || reg_seen_between_p (ldi
.dest
, ldi
.insn
, add
.insn
))
4539 // If something is between the Ldi and the current insn, we can
4540 // set the Ldi invalid to speed future scans.
4541 return ldi
.insn
= nullptr;
4544 // Found a Ldi with const and a PLUS insns in the same BB,
4545 // and with no interfering insns between them.
4547 // Emit new Ldi with the sum of the original offsets after the old Ldi.
4548 rtx xval
= plus_constant (Pmode
, ldi
.src
, INTVAL (add
.addend
));
4550 rtx_insn
*insn
= emit_move_ccc_after (ldi
.dest
, xval
, ldi
.insn
);
4551 avr_dump (";; new Ldi[%d] insn %d after %d: R%d = %r\n\n", ldi
.regno
,
4552 INSN_UID (insn
), INSN_UID (ldi
.insn
), ldi
.regno
, xval
);
4554 rtx_insn
*next
= NEXT_INSN (add
.insn
);
4561 /* Try to combine two PLUS insns with CONST_INT addend to one such insn.
4562 If PREV_ADD is valid, then it precedes ADD in the same basic block.
4563 When a replacement is found, a new insn is emitted and the old insns
4564 are pseudo-deleted. The returned insn is the point where the calling
4565 scanner should continue. When no replacement is found, nullptr is
4566 returned and nothing changed. */
4569 avr_pass_fuse_add::fuse_add_add (Add_Insn
&prev_add
, Add_Insn
&add
)
4571 if (! prev_add
.valid ()
4572 || reg_seen_between_p (add
.dest
, prev_add
.insn
, add
.insn
))
4574 // If something is between the previous Add and the current insn,
4575 // we can set the previous Add invalid to speed future scans.
4576 return prev_add
.insn
= nullptr;
4579 // Found two PLUS insns in the same BB, and with no interfering
4580 // insns between them.
4581 rtx plus
= plus_constant (Pmode
, add
.src
, INTVAL (prev_add
.addend
));
4586 avr_dump (";; Add[%d] from %d annihilates %d\n\n", add
.regno
,
4587 INSN_UID (prev_add
.insn
), INSN_UID (add
.insn
));
4588 next
= NEXT_INSN (add
.insn
);
4592 // Emit after the current insn, so that it will be picked
4593 // up as next valid Add insn.
4594 next
= emit_move_ccc_after (add
.dest
, plus
, add
.insn
);
4595 avr_dump (";; #1 new Add[%d] insn %d after %d: R%d += %d\n\n",
4596 add
.regno
, INSN_UID (next
), INSN_UID (add
.insn
),
4597 add
.regno
, (int) INTVAL (XEXP (plus
, 1)));
4598 gcc_assert (GET_CODE (plus
) == PLUS
);
4602 prev_add
.set_deleted ();
4607 /* Try to combine a PLUS of the address register with a load or store insn.
4608 If ADD is valid, then it precedes MEM in the same basic block.
4609 When a replacement is found, a new insn is emitted and the old insns
4610 are pseudo-deleted. The returned insn is the point where the calling
4611 scanner should continue. When no replacement is found, nullptr is
4612 returned and nothing changed. */
4615 avr_pass_fuse_add::fuse_add_mem (Add_Insn
&add
, Mem_Insn
&mem
)
4618 || reg_seen_between_p (add
.dest
, add
.insn
, mem
.insn
))
4620 // If something is between the Add and the current insn, we can
4621 // set the Add invalid to speed future scans.
4622 return add
.insn
= nullptr;
4625 AVR_LdSt_Props ap
{ mem
};
4627 int msize
= GET_MODE_SIZE (mem
.mode
);
4629 // The mem insn really wants PRE_DEC.
4630 bool case1
= ((mem
.addr_code
== REG
|| mem
.addr_code
== POST_INC
)
4631 && msize
> 1 && ap
.want_predec
&& ! ap
.has_ldd
);
4633 // The offset can be consumed by a PRE_DEC.
4634 bool case2
= (- INTVAL (add
.addend
) == msize
4635 && (mem
.addr_code
== REG
|| mem
.addr_code
== POST_INC
)
4636 && ap
.has_predec
&& ! ap
.want_postinc
);
4638 if (! case1
&& ! case2
)
4641 // Change from REG or POST_INC to PRE_DEC.
4642 rtx xmem
= change_address (mem
.mem
, mem
.mode
,
4643 gen_rtx_PRE_DEC (Pmode
, mem
.addr_reg
));
4644 rtx dest
= mem
.store_p
? xmem
: mem
.reg_or_0
;
4645 rtx src
= mem
.store_p
? mem
.reg_or_0
: xmem
;
4647 rtx_insn
*next
= emit_move_ccc_after (dest
, src
, mem
.insn
);
4648 add_reg_note (next
, REG_INC
, mem
.addr_reg
);
4649 avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", mem
.addr_regno
,
4650 INSN_UID (next
), INSN_UID (mem
.insn
), dest
, src
);
4652 // Changing REG or POST_INC -> PRE_DEC means that the addend before
4653 // the memory access must be increased by the size of the access,
4654 rtx plus
= plus_constant (Pmode
, add
.src
, msize
);
4657 rtx_insn
*insn
= emit_move_ccc_after (add
.dest
, plus
, add
.insn
);
4658 avr_dump (";; #2 new Add[%d] insn %d after %d: R%d += %d\n\n",
4659 add
.regno
, INSN_UID (insn
), INSN_UID (add
.insn
),
4660 add
.regno
, (int) INTVAL (XEXP (plus
, 1)));
4661 gcc_assert (GET_CODE (plus
) == PLUS
);
4664 avr_dump (";; Add[%d] insn %d consumed into %d\n\n",
4665 add
.regno
, INSN_UID (add
.insn
), INSN_UID (next
));
4667 // Changing POST_INC -> PRE_DEC means that the addend after the mem has to be
4668 // the size of the access. The hope is that this new add insn may be unused.
4669 if (mem
.addr_code
== POST_INC
)
4671 plus
= plus_constant (Pmode
, add
.dest
, msize
);
4672 rtx_insn
*next2
= emit_move_ccc_after (add
.dest
, plus
, next
);
4673 avr_dump (";; #3 new Add[%d] insn %d after %d: R%d += %d\n\n", add
.regno
,
4674 INSN_UID (next2
), INSN_UID (next
), add
.regno
, msize
);
4684 /* Try to combine a load or store insn with a PLUS of the address register.
4685 If MEM is valid, then it precedes ADD in the same basic block.
4686 When a replacement is found, a new insn is emitted and the old insns
4687 are pseudo-deleted. The returned insn is the point where the calling
4688 scanner should continue. When no replacement is found, nullptr is
4689 returned and nothing changed. */
4692 avr_pass_fuse_add::fuse_mem_add (Mem_Insn
&mem
, Add_Insn
&add
)
4695 || reg_seen_between_p (add
.dest
, mem
.insn
, add
.insn
))
4697 // If something is between the Mem and the current insn, we can
4698 // set the Mem invalid to speed future scans.
4699 return mem
.insn
= nullptr;
4702 AVR_LdSt_Props ap
{ mem
};
4704 int msize
= GET_MODE_SIZE (mem
.mode
);
4706 // The add insn can be consumed by a POST_INC.
4707 bool case1
= (mem
.addr_code
== REG
4708 && INTVAL (add
.addend
) == msize
4709 && ap
.has_postinc
&& ! ap
.want_predec
);
4711 // There are cases where even a partial consumption of the offset is better.
4712 // This are the cases where no LD+offset addressing is available, because
4713 // the address register is obviously used after the mem insn, and a mem insn
4714 // with REG addressing mode will have to restore the address.
4715 bool case2
= (mem
.addr_code
== REG
4716 && msize
> 1 && ap
.want_postinc
&& ! ap
.has_ldd
);
4718 if (! case1
&& ! case2
)
4721 // Change addressing mode from REG to POST_INC.
4722 rtx xmem
= change_address (mem
.mem
, mem
.mode
,
4723 gen_rtx_POST_INC (Pmode
, mem
.addr_reg
));
4724 rtx dest
= mem
.store_p
? xmem
: mem
.reg_or_0
;
4725 rtx src
= mem
.store_p
? mem
.reg_or_0
: xmem
;
4727 rtx_insn
*insn
= emit_move_ccc_after (dest
, src
, mem
.insn
);
4728 add_reg_note (insn
, REG_INC
, mem
.addr_reg
);
4729 avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", add
.regno
,
4730 INSN_UID (insn
), INSN_UID (mem
.insn
), dest
, src
);
4732 rtx_insn
*next
= NEXT_INSN (add
.insn
);
4734 // Changing REG -> POST_INC means that the post addend must be
4735 // decreased by the size of the access.
4736 rtx plus
= plus_constant (Pmode
, add
.src
, -msize
);
4739 next
= emit_move_ccc_after (mem
.addr_reg
, plus
, add
.insn
);
4740 avr_dump (";; #4 new Add[%d] insn %d after %d: R%d += %d\n\n",
4741 add
.regno
, INSN_UID (next
), INSN_UID (add
.insn
),
4742 add
.regno
, (int) INTVAL (XEXP (plus
, 1)));
4743 gcc_assert (GET_CODE (plus
) == PLUS
);
4746 avr_dump (";; Add[%d] insn %d consumed into %d\n\n",
4747 add
.regno
, INSN_UID (add
.insn
), INSN_UID (insn
));
4755 /* Try to post-reload combine PLUS with CONST_INt of pointer registers with:
4756 - Sets to a constant address.
4757 - PLUS insn of that kind.
4758 - Indirect loads and stores.
4759 In almost all cases, combine opportunities arise from the preparation
4760 done by `avr_split_fake_addressing_move', but in some rare cases combinations
4761 are found for the ordinary cores, too.
4762 As we consider at most one Mem insn per try, there may still be missed
4763 optimizations like POST_INC + PLUS + POST_INC might be performed
4764 as PRE_DEC + PRE_DEC for two adjacent locations. */
4767 avr_pass_fuse_add::execute1 (function
*func
)
4769 df_note_add_problem ();
4772 int n_add
= 0, n_mem
= 0, n_ldi
= 0;
4775 FOR_EACH_BB_FN (bb
, func
)
4777 Ldi_Insn prev_ldi_insns
[REG_32
];
4778 Add_Insn prev_add_insns
[REG_32
];
4779 Mem_Insn prev_mem_insns
[REG_32
];
4780 rtx_insn
*insn
, *curr
;
4782 avr_dump ("\n;; basic block %d\n\n", bb
->index
);
4784 FOR_BB_INSNS_SAFE (bb
, insn
, curr
)
4786 rtx_insn
*next
= nullptr;
4787 Ldi_Insn ldi_insn
{ insn
};
4788 Add_Insn add_insn
{ insn
};
4789 Mem_Insn mem_insn
{ insn
};
4791 if (add_insn
.valid ())
4793 // Found reg:HI += const_int
4794 avr_dump (";; insn %d: Add[%d]: R%d += %d\n\n",
4795 INSN_UID (add_insn
.insn
), add_insn
.regno
,
4796 add_insn
.regno
, (int) INTVAL (add_insn
.addend
));
4797 Ldi_Insn
&prev_ldi_insn
= prev_ldi_insns
[add_insn
.regno
];
4798 Add_Insn
&prev_add_insn
= prev_add_insns
[add_insn
.regno
];
4799 Mem_Insn
&prev_mem_insn
= prev_mem_insns
[add_insn
.regno
];
4800 if ((next
= fuse_ldi_add (prev_ldi_insn
, add_insn
)))
4801 curr
= next
, n_ldi
+= 1;
4802 else if ((next
= fuse_add_add (prev_add_insn
, add_insn
)))
4803 curr
= next
, n_add
+= 1;
4804 else if ((next
= fuse_mem_add (prev_mem_insn
, add_insn
)))
4805 curr
= next
, n_mem
+= 1;
4807 prev_add_insn
= add_insn
;
4809 else if (mem_insn
.valid ())
4811 int addr_regno
= REGNO (mem_insn
.addr_reg
);
4812 avr_dump (";; insn %d: Mem[%d]: %r = %r\n\n",
4813 INSN_UID (mem_insn
.insn
), addr_regno
,
4814 mem_insn
.dest
, mem_insn
.src
);
4815 Add_Insn
&prev_add_insn
= prev_add_insns
[addr_regno
];
4816 if ((next
= fuse_add_mem (prev_add_insn
, mem_insn
)))
4817 curr
= next
, n_mem
+= 1;
4819 prev_mem_insns
[addr_regno
] = mem_insn
;
4821 else if (ldi_insn
.valid ())
4823 if (! CONST_INT_P (ldi_insn
.src
))
4824 avr_dump (";; insn %d: Ldi[%d]: R%d = %r\n\n",
4825 INSN_UID (ldi_insn
.insn
), ldi_insn
.regno
,
4826 ldi_insn
.regno
, ldi_insn
.src
);
4827 prev_ldi_insns
[ldi_insn
.regno
] = ldi_insn
;
4832 avr_dump (";; Function %f: Found %d changes: %d ldi, %d add, %d mem.\n",
4833 n_ldi
+ n_add
+ n_mem
, n_ldi
, n_add
, n_mem
);
4840 //////////////////////////////////////////////////////////////////////////////
4841 // Split shift insns after peephole2 / befor avr-fuse-move.
4843 static const pass_data avr_pass_data_split_after_peephole2
=
4846 "", // name (will be patched)
4847 OPTGROUP_NONE
, // optinfo_flags
4848 TV_DF_SCAN
, // tv_id
4849 0, // properties_required
4850 0, // properties_provided
4851 0, // properties_destroyed
4852 0, // todo_flags_start
4853 0 // todo_flags_finish
4856 class avr_pass_split_after_peephole2
: public rtl_opt_pass
4859 avr_pass_split_after_peephole2 (gcc::context
*ctxt
, const char *name
)
4860 : rtl_opt_pass (avr_pass_data_split_after_peephole2
, ctxt
)
4865 unsigned int execute (function
*) final override
4867 if (avr_shift_is_3op ())
4872 }; // avr_pass_split_after_peephole2
4874 } // anonymous namespace
4877 /* Whether some shift insn alternatives are a `3op' 3-operand insn.
4878 This 3op alternatives allow the source and the destination register
4879 of the shift to be different right from the start, because the splitter
4880 will split the 3op shift into a 3-operand byte shift and a 2-operand
4881 residual bit shift. (When the residual shift has an offset of one
4882 less than the bitsize, then the residual shift is also a 3op insn.) */
4887 // Don't split for OPTIMIZE_SIZE_MAX (-Oz).
4888 // For OPTIMIZE_SIZE_BALANCED (-Os), we still split because
4889 // the size overhead (if at all) is marginal.
4891 return (avropt_split_bit_shift
4893 && avr_optimize_size_level () < OPTIMIZE_SIZE_MAX
);
4897 /* Implement constraints `C2a', `C2l', `C2r' ... `C4a', `C4l', `C4r'.
4898 Whether we split an N_BYTES shift of code CODE in { ASHIFTRT,
4899 LSHIFTRT, ASHIFT } into a byte shift and a residual bit shift. */
4902 avr_split_shift_p (int n_bytes
, int offset
, rtx_code code
)
4904 gcc_assert (n_bytes
== 4 || n_bytes
== 3 || n_bytes
== 2);
4906 if (! avr_shift_is_3op ()
4911 return select
<bool>()
4912 : code
== ASHIFT
? IN_RANGE (offset
, 9, 31) && offset
!= 15
4913 : code
== ASHIFTRT
? IN_RANGE (offset
, 9, 29) && offset
!= 15
4914 : code
== LSHIFTRT
? IN_RANGE (offset
, 9, 31) && offset
!= 15
4915 : bad_case
<bool> ();
4918 return select
<bool>()
4919 : code
== ASHIFT
? IN_RANGE (offset
, 9, 23) && offset
!= 15
4920 : code
== ASHIFTRT
? IN_RANGE (offset
, 9, 21) && offset
!= 15
4921 : code
== LSHIFTRT
? IN_RANGE (offset
, 9, 23) && offset
!= 15
4922 : bad_case
<bool> ();
4925 return select
<bool>()
4926 : code
== ASHIFT
? IN_RANGE (offset
, 9, 15)
4927 : code
== ASHIFTRT
? IN_RANGE (offset
, 9, 13)
4928 : code
== LSHIFTRT
? IN_RANGE (offset
, 9, 15)
4929 : bad_case
<bool> ();
4935 /* Emit a DEST = SRC <code> OFF shift of QImode, HImode or PSImode.
4936 SCRATCH is a QImode d-register, scratch:QI, or NULL_RTX.
4937 This function is used to emit shifts that have been split into
4938 a byte shift and a residual bit shift that operates on a mode
4939 strictly smaller than the original shift. */
4942 avr_emit_shift (rtx_code code
, rtx dest
, rtx src
, int off
, rtx scratch
)
4944 const machine_mode mode
= GET_MODE (dest
);
4945 const int n_bits
= GET_MODE_BITSIZE (mode
);
4946 rtx xoff
= GEN_INT (off
);
4948 // Work out which alternatives can handle 3 operands independent
4951 const bool b8_is_3op
= off
== 6;
4953 const bool b16_is_3op
= select
<bool>()
4954 : code
== ASHIFT
? (satisfies_constraint_C7c (xoff
) // 7...12
4955 // The "C05 C06" alternative of *ashlhi3_const.
4956 || (AVR_HAVE_MUL
&& scratch
&& (off
== 5 || off
== 6)))
4957 : code
== LSHIFTRT
? satisfies_constraint_C7c (xoff
)
4958 : code
== ASHIFTRT
? off
== 7
4959 : bad_case
<bool> ();
4961 const bool b24_is_3op
= select
<bool>()
4962 : code
== ASHIFT
? off
== 15
4963 : code
== LSHIFTRT
? off
== 15
4964 : code
== ASHIFTRT
? false
4965 : bad_case
<bool> ();
4967 const bool is_3op
= (off
% 8 == 0
4968 || off
== n_bits
- 1
4969 || (code
== ASHIFTRT
&& off
== n_bits
- 2)
4970 || (n_bits
== 8 && b8_is_3op
)
4971 || (n_bits
== 16 && b16_is_3op
)
4972 || (n_bits
== 24 && b24_is_3op
));
4977 shift
= gen_rtx_fmt_ee (code
, mode
, src
, xoff
);
4981 if (REGNO (dest
) != REGNO (src
))
4982 emit_valid_move_clobbercc (dest
, src
);
4983 shift
= gen_rtx_fmt_ee (code
, mode
, dest
, xoff
);
4987 // 8-bit shifts don't have a scratch operand.
4989 else if (! scratch
&& n_bits
== 24)
4990 // 24-bit shifts always have a scratch operand.
4991 scratch
= gen_rtx_SCRATCH (QImode
);
4993 emit_valid_move_clobbercc (dest
, shift
, scratch
);
4997 /* Handle the 4-byte case of avr_split_shift below:
4998 Split 4-byte shift DEST = SRC <code> IOFF into a 3-operand
4999 byte shift and a residual shift in a smaller mode if possible.
5000 SCRATCH is a QImode upper scratch register or NULL_RTX. */
5003 avr_split_shift4 (rtx dest
, rtx src
, int ioff
, rtx scratch
, rtx_code code
)
5005 gcc_assert (GET_MODE_SIZE (GET_MODE (dest
)) == 4);
5009 if (IN_RANGE (ioff
, 25, 31))
5011 rtx dst8
= avr_byte (dest
, 3);
5012 rtx src8
= avr_byte (src
, 0);
5013 avr_emit_shift (code
, dst8
, src8
, ioff
- 24, NULL_RTX
);
5014 emit_valid_move_clobbercc (avr_byte (dest
, 2), const0_rtx
);
5015 emit_valid_move_clobbercc (avr_word (dest
, 0), const0_rtx
);
5018 else if (IN_RANGE (ioff
, 17, 23))
5020 rtx dst16
= avr_word (dest
, 2);
5021 rtx src16
= avr_word (src
, 0);
5022 avr_emit_shift (code
, dst16
, src16
, ioff
- 16, scratch
);
5023 emit_valid_move_clobbercc (avr_word (dest
, 0), const0_rtx
);
5026 // ...the 9...14 cases are only handled by define_split because
5027 // for now, we don't exploit that the low byte is zero.
5029 else if (code
== ASHIFTRT
5030 || code
== LSHIFTRT
)
5032 if (IN_RANGE (ioff
, 25, 30 + (code
== LSHIFTRT
)))
5034 rtx dst8
= avr_byte (dest
, 0);
5035 rtx src8
= avr_byte (src
, 3);
5036 avr_emit_shift (code
, dst8
, src8
, ioff
- 24, NULL_RTX
);
5037 if (code
== ASHIFTRT
)
5039 rtx signs
= avr_byte (dest
, 1);
5040 avr_emit_shift (code
, signs
, src8
, 7, NULL_RTX
);
5041 emit_valid_move_clobbercc (avr_byte (dest
, 2), signs
);
5042 emit_valid_move_clobbercc (avr_byte (dest
, 3), signs
);
5046 emit_valid_move_clobbercc (avr_byte (dest
, 1), const0_rtx
);
5047 emit_valid_move_clobbercc (avr_word (dest
, 2), const0_rtx
);
5051 else if (IN_RANGE (ioff
, 17, 23))
5053 rtx dst16
= avr_word (dest
, 0);
5054 rtx src16
= avr_word (src
, 2);
5055 avr_emit_shift (code
, dst16
, src16
, ioff
- 16, scratch
);
5056 if (code
== ASHIFTRT
)
5058 rtx msb
= avr_byte (src
, 3);
5059 rtx signs
= avr_byte (dest
, 2);
5060 avr_emit_shift (code
, signs
, msb
, 7, NULL_RTX
);
5061 emit_valid_move_clobbercc (avr_byte (dest
, 3), signs
);
5064 emit_valid_move_clobbercc (avr_word (dest
, 2), const0_rtx
);
5068 else if (IN_RANGE (ioff
, 9, 15))
5070 avr_emit_shift (code
, dest
, src
, 8, scratch
);
5071 rtx dst24
= avr_chunk (PSImode
, dest
, 0);
5072 rtx src24
= avr_chunk (PSImode
, dest
, 0);
5073 avr_emit_shift (code
, dst24
, src24
, ioff
- 8, scratch
);
5084 /* Handle the 3-byte case of avr_split_shift below:
5085 Split 3-byte shift DEST = SRC <code> IOFF into a 3-operand
5086 byte shift and a residual shift in a smaller mode if possible.
5087 SCRATCH is a QImode upper scratch register or NULL_RTX. */
5090 avr_split_shift3 (rtx dest
, rtx src
, int ioff
, rtx scratch
, rtx_code code
)
5092 gcc_assert (GET_MODE_SIZE (GET_MODE (dest
)) == 3);
5096 if (IN_RANGE (ioff
, 17, 23))
5098 rtx dst8
= avr_byte (dest
, 2);
5099 rtx src8
= avr_byte (src
, 0);
5100 avr_emit_shift (code
, dst8
, src8
, ioff
- 16, NULL_RTX
);
5101 emit_valid_move_clobbercc (avr_word (dest
, 0), const0_rtx
);
5104 // ...the 9...14 cases are only handled by define_split because
5105 // for now, we don't exploit that the low byte is zero.
5107 else if (code
== ASHIFTRT
5108 || code
== LSHIFTRT
)
5110 if (IN_RANGE (ioff
, 17, 22 + (code
== LSHIFTRT
)))
5112 rtx dst8
= avr_byte (dest
, 0);
5113 rtx src8
= avr_byte (src
, 2);
5114 avr_emit_shift (code
, dst8
, src8
, ioff
- 16, NULL_RTX
);
5115 if (code
== ASHIFTRT
)
5117 rtx signs
= avr_byte (dest
, 1);
5118 avr_emit_shift (code
, signs
, src8
, 7, NULL_RTX
);
5119 emit_valid_move_clobbercc (avr_byte (dest
, 2), signs
);
5123 emit_valid_move_clobbercc (avr_byte (dest
, 1), const0_rtx
);
5124 emit_valid_move_clobbercc (avr_byte (dest
, 2), const0_rtx
);
5128 else if (IN_RANGE (ioff
, 9, 15))
5130 avr_emit_shift (code
, dest
, src
, 8, scratch
);
5131 rtx dst16
= avr_chunk (HImode
, dest
, 0);
5132 rtx src16
= avr_chunk (HImode
, dest
, 0);
5133 avr_emit_shift (code
, dst16
, src16
, ioff
- 8, scratch
);
5144 /* Handle the 2-byte case of avr_split_shift below:
5145 Split 2-byte shift DEST = SRC <code> IOFF into a 3-operand
5146 byte shift and a residual shift in a smaller mode if possible.
5147 SCRATCH is a QImode upper scratch register or NULL_RTX. */
5150 avr_split_shift2 (rtx dest
, rtx src
, int ioff
, rtx
/*scratch*/, rtx_code code
)
5152 gcc_assert (GET_MODE_SIZE (GET_MODE (dest
)) == 2);
5156 if (IN_RANGE (ioff
, 9, 15))
5158 rtx dst8
= avr_byte (dest
, 1);
5159 rtx src8
= avr_byte (src
, 0);
5160 avr_emit_shift (code
, dst8
, src8
, ioff
- 8, NULL_RTX
);
5161 emit_valid_move_clobbercc (avr_byte (dest
, 0), const0_rtx
);
5165 else if (code
== ASHIFTRT
5166 || code
== LSHIFTRT
)
5168 if (IN_RANGE (ioff
, 9, 14 + (code
== LSHIFTRT
)))
5170 rtx dst8
= avr_byte (dest
, 0);
5171 rtx src8
= avr_byte (src
, 1);
5172 rtx signs
= const0_rtx
;
5173 avr_emit_shift (code
, dst8
, src8
, ioff
- 8, NULL_RTX
);
5174 if (code
== ASHIFTRT
)
5176 signs
= avr_byte (dest
, 1);
5177 avr_emit_shift (code
, signs
, src8
, 7, NULL_RTX
);
5179 emit_valid_move_clobbercc (avr_byte (dest
, 1), signs
);
5190 /* Worker for a define_split that runs when -msplit-bit-shift is on.
5191 Split a shift of code CODE into a 3op byte shift and a residual bit shift.
5192 Return 'true' when a split has been performed and insns have been emitted.
5193 Otherwise, return 'false'. */
5196 avr_split_shift (rtx xop
[], rtx scratch
, rtx_code code
)
5198 scratch
= scratch
&& REG_P (scratch
) ? scratch
: NULL_RTX
;
5201 int ioff
= INTVAL (xop
[2]);
5202 int n_bytes
= GET_MODE_SIZE (GET_MODE (dest
));
5204 return select
<bool>()
5205 : n_bytes
== 2 ? avr_split_shift2 (dest
, src
, ioff
, scratch
, code
)
5206 : n_bytes
== 3 ? avr_split_shift3 (dest
, src
, ioff
, scratch
, code
)
5207 : n_bytes
== 4 ? avr_split_shift4 (dest
, src
, ioff
, scratch
, code
)
5208 : bad_case
<bool> ();
5216 //////////////////////////////////////////////////////////////////////////////
5217 // Determine whether an ISR may use the __gcc_isr pseudo-instruction.
5219 static const pass_data avr_pass_data_pre_proep
=
5222 "", // name (will be patched)
5223 OPTGROUP_NONE
, // optinfo_flags
5224 TV_DF_SCAN
, // tv_id
5225 0, // properties_required
5226 0, // properties_provided
5227 0, // properties_destroyed
5228 0, // todo_flags_start
5229 0 // todo_flags_finish
5232 class avr_pass_pre_proep
: public rtl_opt_pass
5235 avr_pass_pre_proep (gcc::context
*ctxt
, const char *name
)
5236 : rtl_opt_pass (avr_pass_data_pre_proep
, ctxt
)
5241 void compute_maybe_gasisr (function
*);
5243 unsigned int execute (function
*fun
) final override
5245 if (avropt_gasisr_prologues
5246 // Whether this function is an ISR worth scanning at all.
5247 && !fun
->machine
->is_no_gccisr
5248 && (fun
->machine
->is_interrupt
5249 || fun
->machine
->is_signal
)
5250 && !cfun
->machine
->is_naked
5251 // Paranoia: Non-local gotos and labels that might escape.
5252 && !cfun
->calls_setjmp
5253 && !cfun
->has_nonlocal_label
5254 && !cfun
->has_forced_label_in_static
)
5256 compute_maybe_gasisr (fun
);
5262 }; // avr_pass_pre_proep
5265 /* Set fun->machine->gasisr.maybe provided we don't find anything that
5266 prohibits GAS generating parts of ISR prologues / epilogues for us. */
5269 avr_pass_pre_proep::compute_maybe_gasisr (function
*fun
)
5271 // Don't use BB iterators so that we see JUMP_TABLE_DATA.
5273 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5275 // Transparent calls always use [R]CALL and are filtered out by GAS.
5276 // ISRs don't use -mcall-prologues, hence what remains to be filtered
5277 // out are open coded (tail) calls.
5282 // __tablejump2__ clobbers something and is targeted by JMP so
5283 // that GAS won't see its usage.
5285 if (AVR_HAVE_JMP_CALL
5286 && JUMP_TABLE_DATA_P (insn
))
5289 // Non-local gotos not seen in *FUN.
5292 && find_reg_note (insn
, REG_NON_LOCAL_GOTO
, NULL_RTX
))
5296 fun
->machine
->gasisr
.maybe
= 1;
5301 //////////////////////////////////////////////////////////////////////////////
5302 // Late recomputation of notes so we can use `reg_unused_after()' and friends.
5304 static const pass_data avr_pass_data_recompute_notes
=
5307 "", // name (will be patched)
5308 OPTGROUP_NONE
, // optinfo_flags
5309 TV_DF_SCAN
, // tv_id
5310 0, // properties_required
5311 0, // properties_provided
5312 0, // properties_destroyed
5313 0, // todo_flags_start
5314 TODO_df_finish
| TODO_df_verify
// todo_flags_finish
5317 class avr_pass_recompute_notes
: public rtl_opt_pass
5320 avr_pass_recompute_notes (gcc::context
*ctxt
, const char *name
)
5321 : rtl_opt_pass (avr_pass_data_recompute_notes
, ctxt
)
5326 unsigned int execute (function
*) final override
5328 df_note_add_problem ();
5333 }; // avr_pass_recompute_notes
5335 } // anonymous namespace
5339 //////////////////////////////////////////////////////////////////////////////
5340 // Function visible and used outside this module.
5342 /* During reload, we allow much more addresses than Reduced Tiny actually
5343 supports. Split them after reload in order to get closer to the
5344 core's capabilities. This sets the stage for pass .avr-fuse-add. */
5347 avr_split_fake_addressing_move (rtx_insn
* /*insn*/, rtx
*xop
)
5349 bool store_p
= false;
5352 if (REG_P (xop
[0]) && MEM_P (xop
[1]))
5357 else if (MEM_P (xop
[0])
5359 || xop
[1] == CONST0_RTX (GET_MODE (xop
[0]))))
5368 machine_mode mode
= GET_MODE (mem
);
5369 rtx base
, addr
= XEXP (mem
, 0);
5370 rtx_code addr_code
= GET_CODE (addr
);
5372 if (REG_P (reg_or_0
)
5373 && reg_overlap_mentioned_p (reg_or_0
, addr
))
5375 else if (addr_code
== PLUS
|| addr_code
== PRE_DEC
|| addr_code
== POST_INC
)
5376 base
= XEXP (addr
, 0);
5377 else if (addr_code
== REG
)
5382 if (REGNO (base
) > REG_Z
)
5386 // Only keep base registers that can't do PLUS addressing.
5387 && ((REGNO (base
) != REG_X
5388 && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem
)))
5389 || avr_load_libgcc_p (mem
)
5390 || avr_mem_memx_p (mem
)))
5393 bool volatile_p
= MEM_VOLATILE_P (mem
);
5394 bool mem_volatile_p
= false;
5395 if (frame_pointer_needed
5396 && REGNO (base
) == FRAME_POINTER_REGNUM
)
5398 if (avropt_fuse_add
< 2
5399 // Be a projection (we always split PLUS).
5400 || (avropt_fuse_add
== 2 && volatile_p
&& addr_code
!= PLUS
))
5403 // Changing the frame pointer locally may confuse later passes
5404 // like .dse2 which don't track changes of FP, not even when
5405 // respective CFA notes are present. An example is pr22141-1.c.
5406 if (avropt_fuse_add
== 2)
5407 mem_volatile_p
= true;
5410 rtx_code new_code
= UNKNOWN
;
5411 HOST_WIDE_INT add
= 0, sub
= 0;
5412 int msize
= GET_MODE_SIZE (mode
);
5414 AVR_LdSt_Props ap
{ (int) REGNO (base
), store_p
, volatile_p
,
5415 ADDR_SPACE_GENERIC
};
5423 add
= INTVAL (XEXP (addr
, 1));
5429 else if (ap
.want_predec
)
5431 // volatile stores prefer PRE_DEC (MSB first)
5438 new_code
= POST_INC
;
5444 // volatile stores prefer PRE_DEC (MSB first)
5445 if (msize
> 1 && ap
.want_predec
)
5455 // volatile loads prefer POST_INC (LSB first)
5456 if (msize
> 1 && ap
.want_postinc
)
5459 new_code
= POST_INC
;
5478 new_code
= POST_INC
;
5482 } // switch addr_code
5488 insn
= emit_move_ccc (base
, plus_constant (Pmode
, base
, add
));
5489 avr_maybe_adjust_cfa (insn
, base
, add
);
5492 rtx new_addr
= new_code
== REG
5494 : gen_rtx_fmt_e (new_code
, Pmode
, base
);
5496 rtx new_mem
= change_address (mem
, mode
, new_addr
);
5498 MEM_VOLATILE_P (new_mem
) = 1;
5500 insn
= emit_move_ccc (store_p
? new_mem
: reg_or_0
,
5501 store_p
? reg_or_0
: new_mem
);
5502 if (auto_inc_p (new_addr
))
5504 add_reg_note (insn
, REG_INC
, base
);
5505 int off
= new_code
== POST_INC
? msize
: -msize
;
5506 avr_maybe_adjust_cfa (insn
, base
, off
);
5511 insn
= emit_move_ccc (base
, plus_constant (Pmode
, base
, sub
));
5512 avr_maybe_adjust_cfa (insn
, base
, sub
);
5519 /* Given memory reference mem(ADDR), return true when it can be split into
5520 single-byte moves, and all resulting addresses are natively supported.
5521 ADDR is in addr-space generic. */
5524 splittable_address_p (rtx addr
, int n_bytes
)
5526 if (CONSTANT_ADDRESS_P (addr
)
5527 || GET_CODE (addr
) == PRE_DEC
5528 || GET_CODE (addr
) == POST_INC
)
5533 rtx base
= select
<rtx
>()
5534 : REG_P (addr
) ? addr
5535 : GET_CODE (addr
) == PLUS
? XEXP (addr
, 0)
5538 int off
= select
<int>()
5540 : GET_CODE (addr
) == PLUS
? (int) INTVAL (XEXP (addr
, 1))
5543 return (base
&& REG_P (base
)
5544 && (REGNO (base
) == REG_Y
|| REGNO (base
) == REG_Z
)
5545 && IN_RANGE (off
, 0, 64 - n_bytes
));
5552 /* Like avr_byte(), but also knows how to split POST_INC and PRE_DEC
5553 memory references. */
5556 avr_byte_maybe_mem (rtx x
, int n
)
5560 && (GET_CODE (addr
= XEXP (x
, 0)) == POST_INC
5561 || GET_CODE (addr
) == PRE_DEC
))
5562 b
= gen_rtx_MEM (QImode
, copy_rtx (addr
));
5564 b
= avr_byte (x
, n
);
5567 gcc_assert (MEM_P (b
));
5573 /* Split multi-byte load / stores into 1-byte such insns
5574 provided non-volatile, addr-space = generic, no reg-overlap
5575 and the resulting addressings are all natively supported.
5576 Returns true when the XOP[0] = XOP[1] insn has been split and
5577 false, otherwise. */
5580 avr_split_ldst (rtx
*xop
)
5584 machine_mode mode
= GET_MODE (dest
);
5585 int n_bytes
= GET_MODE_SIZE (mode
);
5588 if (MEM_P (dest
) && reg_or_0_operand (src
, mode
))
5593 else if (register_operand (dest
, mode
) && MEM_P (src
))
5601 rtx addr
= XEXP (mem
, 0);
5603 if (MEM_VOLATILE_P (mem
)
5604 || ! ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem
))
5605 || ! IN_RANGE (n_bytes
, 2, 4)
5606 || ! splittable_address_p (addr
, n_bytes
)
5607 || reg_overlap_mentioned_p (reg_or_0
, addr
))
5610 const int step
= GET_CODE (addr
) == PRE_DEC
? -1 : 1;
5611 const int istart
= step
> 0 ? 0 : n_bytes
- 1;
5612 const int iend
= istart
+ step
* n_bytes
;
5614 for (int i
= istart
; i
!= iend
; i
+= step
)
5616 rtx di
= avr_byte_maybe_mem (dest
, i
);
5617 rtx si
= avr_byte_maybe_mem (src
, i
);
5618 emit_move_ccc (di
, si
);
5626 // Functions make_<pass-name> (gcc::context*) where <pass-name> is
5627 // according to the pass declaration in avr-passes.def. GCC's pass
5628 // manager uses these function to create the respective pass object.
5630 // Optimize results of the casesi expander for modes < SImode.
5633 make_avr_pass_casesi (gcc::context
*ctxt
)
5635 return new avr_pass_casesi (ctxt
, "avr-casesi");
5638 // Try to replace 2 cbranch insns with 1 comparison and 2 branches.
5641 make_avr_pass_ifelse (gcc::context
*ctxt
)
5643 return new avr_pass_ifelse (ctxt
, "avr-ifelse");
5646 // Determine whether an ISR may use the __gcc_isr pseudo-instruction.
5649 make_avr_pass_pre_proep (gcc::context
*ctxt
)
5651 return new avr_pass_pre_proep (ctxt
, "avr-pre-proep");
5654 // Find more POST_INC and PRE_DEC cases.
5657 make_avr_pass_fuse_add (gcc::context
*ctxt
)
5659 return new avr_pass_fuse_add (ctxt
, "avr-fuse-add");
5662 // Late recomputation of notes so we can use `reg_unused_after()' and friends.
5665 make_avr_pass_recompute_notes (gcc::context
*ctxt
)
5667 return new avr_pass_recompute_notes (ctxt
, "avr-notes-free-cfg");
5670 // Optimize moves after reload.
5673 make_avr_pass_fuse_move (gcc::context
*ctxt
)
5675 return new avr_pass_fuse_move (ctxt
, "avr-fuse-move");
5678 // Split insns after peephole2 / befor avr-fuse-move.
5681 make_avr_pass_split_after_peephole2 (gcc::context
*ctxt
)
5683 return new avr_pass_split_after_peephole2 (ctxt
, "avr-split-after-peephole2");