go: update builtin function attributes
[official-gcc.git] / gcc / config / avr / avr-passes.cc
blobe32c46738d8175669a0843b677e65a6aed0e1628
1 /* Support for avr-passes.def for AVR 8-bit microcontrollers.
2 Copyright (C) 2024-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
22 #define INCLUDE_ARRAY
23 #define INCLUDE_VECTOR
24 #include "config.h"
25 #include "system.h"
26 #include "intl.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "cfghooks.h"
33 #include "cfganal.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "explow.h"
42 #include "cfgrtl.h"
43 #include "context.h"
44 #include "tree-pass.h"
45 #include "insn-attr.h"
46 #include "tm-constrs.h"
49 #define CONST_INT_OR_FIXED_P(X) (CONST_INT_P (X) || CONST_FIXED_P (X))
51 #define FIRST_GPR (AVR_TINY ? REG_18 : REG_2)
54 // Emit pattern PAT, and ICE when the insn is not valid / not recognized.
56 static rtx_insn *
57 emit_valid_insn (rtx pat)
59 rtx_insn *insn = emit_insn (pat);
61 if (! valid_insn_p (insn)) // Also runs recog().
62 fatal_insn ("emit unrecognizable insn", insn);
64 return insn;
67 // Emit a single_set with an optional scratch operand. This function
68 // asserts that the new insn is valid and recognized.
70 static rtx_insn *
71 emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
73 rtx pat = scratch
74 ? gen_gen_move_clobbercc_scratch (dest, src, scratch)
75 : gen_gen_move_clobbercc (dest, src);
77 return emit_valid_insn (pat);
81 namespace
84 /////////////////////////////////////////////////////////////////////////////
85 // Before we start with the very code, introduce some helpers that are
86 // quite generic, though up to now only avr-fuse-add makes use of them.
88 /* Get the next / previous NONDEBUG_INSN_P after INSN in basic block BB.
89 This assumes we are in CFG layout mode so that BLOCK_FOR_INSN()
90 can be used. */
92 static rtx_insn *
93 next_nondebug_insn_bb (basic_block bb, rtx_insn *insn, bool forward = true)
95 while (insn)
97 insn = forward ? NEXT_INSN (insn) : PREV_INSN (insn);
99 if (insn && NONDEBUG_INSN_P (insn))
100 return BLOCK_FOR_INSN (insn) == bb ? insn : nullptr;
103 return insn;
106 static rtx_insn *
107 prev_nondebug_insn_bb (basic_block bb, rtx_insn *insn)
109 return next_nondebug_insn_bb (bb, insn, false);
113 /* Like `single_set' with the addition that it sets REGNO_SCRATCH when the
114 insn is a single_set with a QImode scratch register. When the insn has
115 no QImode scratch or just a scratch:QI, then set REGNO_SCRATCH = 0.
116 The assumption is that the function is only used after the splits for
117 REG_CC so that the pattern is a parallel with 2 elements (INSN has no
118 scratch operand), or 3 elements (INSN does have a scratch operand). */
120 static rtx
121 single_set_with_scratch (rtx_insn *insn, int &regno_scratch)
123 regno_scratch = 0;
125 if (! INSN_P (insn))
126 return NULL_RTX;
128 rtx set, clo, reg, pat = PATTERN (insn);
130 // Search for SET + CLOBBER(QI) + CLOBBER(CC).
131 if (GET_CODE (pat) == PARALLEL
132 && XVECLEN (pat, 0) == 3
133 && GET_CODE (set = XVECEXP (pat, 0, 0)) == SET
134 // At this pass, all insn are endowed with clobber(CC).
135 && GET_CODE (clo = XVECEXP (pat, 0, 2)) == CLOBBER
136 && GET_MODE (XEXP (clo, 0)) == CCmode
137 && GET_CODE (clo = XVECEXP (pat, 0, 1)) == CLOBBER
138 && REG_P (reg = XEXP (clo, 0))
139 && GET_MODE (reg) == QImode)
141 regno_scratch = REGNO (reg);
142 return set;
145 return single_set (insn);
149 // One bit for each GRP in REG_0 ... REG_31.
150 using gprmask_t = uint32_t;
152 // True when this is a valid GPR number for ordinary code, e.g.
153 // registers wider than 2 bytes have to start at an exven regno.
154 // TMP_REG and ZERO_REG are not considered valid, even though
155 // the C source can use register vars with them.
156 static inline bool
157 gpr_regno_p (int regno, int n_bytes = 1)
159 return (IN_RANGE (regno, FIRST_GPR, REG_32 - n_bytes)
160 // Size in { 1, 2, 3, 4, 8 } bytes.
161 && ((1u << n_bytes) & 0x11e)
162 // Registers >= 2 bytes start at an even regno.
163 && (n_bytes == 1 || regno % 2 == 0));
166 // There are cases where the C source defines local reg vars
167 // for R1 etc. The assumption is that this is handled before
168 // calling this function, e.g. by skipping code when a register
169 // overlaps with a fixed register.
170 static inline gprmask_t
171 regmask (int regno, int size)
173 gcc_checking_assert (gpr_regno_p (regno, size));
174 gprmask_t bits = (1u << size) - 1;
176 return bits << regno;
179 // Mask for hard register X that's some GPR, including fixed regs like R0.
180 static gprmask_t
181 regmask (rtx x)
183 gcc_assert (REG_P (x));
184 gprmask_t bits = (1u << GET_MODE_SIZE (GET_MODE (x))) - 1;
186 return bits << REGNO (x);
190 // Whether X has bits in the range [B0 ... B1]
191 static inline bool
192 has_bits_in (gprmask_t x, int b0, int b1)
194 if (b0 > b1 || b0 > 31 || b1 < 0)
195 return false;
197 const gprmask_t m = (2u << (b1 - b0)) - 1;
198 return x & (m << b0);
202 template<typename T>
203 T bad_case ()
205 gcc_unreachable ();
208 #define select false ? bad_case
211 namespace AVRasm
213 // Returns true when we a scratch reg is needed in order to get
214 // (siged or unsigned) 8-bit value VAL in some GPR.
215 // When it's about costs rather than the sheer requirement for a
216 // scratch, see also AVRasm::constant_cost.
217 static inline bool ldi_needs_scratch (int regno, int val)
219 return regno < REG_16 && IN_RANGE (val & 0xff, 2, 254);
222 // Return a byte value x >= 0 such that x <code> y == x for all y, or -1.
223 static inline int neutral_val (rtx_code code)
225 return select<int>()
226 : code == AND ? 0xff
227 : code == IOR ? 0x00
228 : code == XOR ? 0x00
229 : code == PLUS ? 0
230 : -1;
233 // When there exists a value x such that the image of the function
234 // y -> y <code> x has order 1, then return that x. Else return -1.
235 static inline int image1_val (rtx_code code)
237 return select<int>()
238 : code == AND ? 0x00
239 : code == IOR ? 0xff
240 : -1;
243 // Cost of 8-bit binary operation x o= VAL provided a scratch is
244 // available as needed.
245 static int constant_cost (rtx_code code, int regno, uint8_t val)
247 bool needs_scratch_p = select<bool>()
248 : code == PLUS ? regno < REG_16 && val != 1 && val != 0xff
249 : code == XOR ? val != 0xff && (regno < REG_16 || val != 0x80)
250 : code == IOR ? regno < REG_16
251 : code == AND ? regno < REG_16 && val != 0
252 : code == SET ? regno < REG_16 && val != 0
253 : bad_case<bool> ();
255 return val == AVRasm::neutral_val (code)
257 : 1 + needs_scratch_p;
259 }; // AVRasm
262 // Returns the mode mask for a mode size of SIZE bytes.
263 static uint64_t size_to_mask (int size)
265 return ((uint64_t) 2 << (8 * size - 1)) - 1;
268 // Return the scalar int mode for a modesize of 1, 2, 3, 4 or 8 bytes.
269 static machine_mode size_to_mode (int size)
271 return select<machine_mode>()
272 : size == 1 ? QImode
273 : size == 2 ? HImode
274 : size == 3 ? PSImode
275 : size == 4 ? SImode
276 : size == 8 ? DImode
277 : bad_case<machine_mode> ();
281 //////////////////////////////////////////////////////////////////////////////
282 // Optimize moves after reload: -mfuse-move=<0,23>
284 /* The purpose of this pass is to perform optimizations after reload
285 like the following ones:
287 Without optimization | With optimization
288 ==================== | =================
290 long long fn_zero (void) (1)
292 return 0;
295 ldi r18, 0 ; movqi_insn | ldi r18, 0 ; movqi_insn
296 ldi r19, 0 ; movqi_insn | ldi r19, 0 ; movqi_insn
297 ldi r20, 0 ; movqi_insn | movw r20, r18 ; *movhi
298 ldi r21, 0 ; movqi_insn |
299 ldi r22, 0 ; movqi_insn | movw r22, r18 ; *movhi
300 ldi r23, 0 ; movqi_insn |
301 ldi r24, 0 ; movqi_insn | movw r24, r18 ; *movhi
302 ldi r25, 0 ; movqi_insn |
303 ret | ret
305 int fn_eq0 (char c) (2)
307 return c == 0;
310 mov r18, r24 ; movqi_insn | mov r18, r24 ; movqi_insn
311 ldi r24, 1 ; *movhi | ldi r24, 1 ; *movhi
312 ldi r25, 0 | ldi r25, 0
313 cp r18, ZERO ; cmpqi3 | cpse r18, ZERO ; peephole
314 breq .+4 ; branch |
315 ldi r24, 0 ; *movhi | ldi r24, 0 ; movqi_insn
316 ldi r25, 0 |
317 ret | ret
319 int a, b; (3)
321 void fn_store_ab (void)
323 a = 1;
324 b = -1;
327 ldi r24, 1 ; *movhi | ldi r24, 1 ; *movhi
328 ldi r25, 0 | ldi r25, 0
329 sts a+1, r25 ; *movhi | sts a+1, r25 ; *movhi
330 sts a, r24 | sts a, r24
331 ldi r24, -1 ; *movhi | sbiw r24, 2 ; *addhi3
332 ldi r25, -1 |
333 sts b+1, r25 ; *movhi | sts b+1, r25 ; *movhi
334 sts b, r24 | sts b, r24
335 ret | ret
337 unsigned fn_crc (unsigned x, unsigned y) (4)
339 for (char i = 8; i--; x <<= 1)
340 y ^= (x ^ y) & 0x80 ? 79U : 0U;
341 return y;
344 movw r18, r24 ; *movhi | movw r18, r24 ; *movhi
345 movw r24, r22 ; *movhi | movw r24, r22 ; *movhi
346 ldi r22, 8 ; movqi_insn | ldi r22, 8 ; movqi_insn
347 .L13: | .L13:
348 movw r30, r18 ; *movhi | movw r30, r18 ; *movhi
349 eor r30, r24 ; *xorqi3 | eor r30, r24 ; *xorqi3
350 eor r31, r25 ; *xorqi3 | eor r31, r25 ; *xorqi3
351 mov r20, r30 ; *andhi3 | mov r20, r30 ; *andqi3
352 andi r20, 1<<7 | andi r20, 1<<7
353 clr r21 |
354 sbrs r30, 7 ; *sbrx_branchhi | sbrc r30, 7 ; *sbrx_branchhi
355 rjmp .+4 |
356 ldi r20, 79 ; movqi_insn | ldi r20, 79 ; movqi_insn
357 ldi r21, 0 ; movqi_insn |
358 eor r24, r20 ; *xorqi3 | eor r24, r20 ; *xorqi3
359 eor r25, r21 ; *xorqi3 |
360 lsl r18 ; *ashlhi3_const | lsl r18 ; *ashlhi3_const
361 rol r19 | rol r19
362 subi r22, 1 ; *op8.for.cczn.p| subi r22, 1 ; *op8.for.cczn.plus
363 brne .L13 ; branch_ZN | brne .L13 ; branch_ZN
364 ret | ret
366 #define SPDR (*(uint8_t volatile*) 0x2c) (5)
368 void fn_PR49807 (long big)
370 SPDR = big >> 24;
371 SPDR = big >> 16;
372 SPDR = big >> 8;
373 SPDR = big;
376 movw r20, r22 ; *movhi | movw r20, r22 ; *movhi
377 movw r22, r24 ; *movhi | movw r22, r24 ; *movhi
378 mov r24, r23 ; *ashrsi3_const |
379 clr r27 |
380 sbrc r24,7 |
381 com r27 |
382 mov r25, r27 |
383 mov r26, r27 |
384 out 0xc, r24 ; movqi_insn | out 0xc, r23 ; movqi_insn
385 movw r24, r22 ; *ashrsi3_const |
386 clr r27 |
387 sbrc r25, 7 |
388 com r27 |
389 mov r26, r27 |
390 out 0xc, r24 ; movqi_insn | out 0xc, r24 ; movqi_insn
391 clr r27 ; *ashrsi3_const |
392 sbrc r23, 7 |
393 dec r27 |
394 mov r26, r23 |
395 mov r25, r22 |
396 mov r24, r21 |
397 out 0xc, r24 ; movqi_insn | out 0xc, r21 ; movqi_insn
398 out 0xc, r20 ; movqi_insn | out 0xc, r20 ; movqi_insn
399 ret | ret
401 The insns of each basic block are traversed from first to last.
402 Each insn is optimized on its own, or may be fused with the
403 previous insn like in example (1).
404 As the insns are traversed, memento_t keeps track of known values
405 held in the GPRs (general purpse registers) R2 ... R31 by simulating
406 the effect of the current insn in memento_t.apply_insn().
407 The basic blocks are traversed in reverse post order so as to
408 maximize the chance that GPRs from all preceding blocks are known,
409 which is the case in example (2). The traversal of the basic block
410 is performed by bbinfo_t.optimize_one_function().
411 bbinfo_t.optimize_one_block() traverses the insns of a BB and tries
412 the following optimizations:
414 bbinfo_t::try_fuse_p
415 Try to fuse two 8-bit insns to one MOVW like in (1).
417 bbinfo_t::try_simplify_p
418 Only perform the simplest optimizations that don't impede the
419 traceability of the generated code, which are:
420 - Transform operations like Rn = Rn=0 ^ Rm to Rn = Rm.
421 - Remove insns that are no-ops like Rn = Rn ^ Rm=0.
423 bbinfo_t::try_bin_arg1_p
424 In insns like EOR Rn, arg1 where arg1 is known or is a reg that
425 dies in the insn, *and* there is a different register Rm that's
426 known to contain the same value, then arg1 is replaced with Rm.
428 bbinfo_t::try_split_ldi_p
429 Tries to simplify loads of constants like in examples (1), (2) and (3).
430 It may use arithmetic instructions like AND with registers that
431 are holding known values when this is profitable.
433 bbinfo_t::try_split_any_p
434 Split all insns where the operation can be performed on individual
435 bytes, like andsi3. In example (4) the andhi3 can be optimized
436 to an andqi3.
438 bbinfo_t::try_mem0_p
439 Try to fuse a mem = reg insn to mem = __zero_reg__.
440 This should only occur when -msplit-ldst is on, but may
441 also occur with pushes since push<mode>1 splits them.
445 // A basic block with additional information like the GPR state.
446 // The main entry point for the pass. Runs various strategies
447 // like try_fuse, try_simplify, try_bin_arg1, try_split_ldi, try_split_any
448 // depending on -mfuse-add=<0,11>.
449 struct bbinfo_t;
451 // Additional insn information on a REG = non-memory single_set insn
452 // for quick access. Only valid when the m_size member is non-zero.
453 struct insninfo_t;
455 // Helper classes with data needed by the try_xxx optimizers.
456 struct optimize_data_t;
457 struct insn_optimize_data_t;
459 // Records which GPRs R0 ... R31 are holding a known value,
460 // and which values these are.
461 struct memento_t;
463 // Abstract Interpretation of expressions.
464 // absint_val_t represents an 8-bit value that equals the content of
465 // some GPR, or equals some known value (or both, or none of them).
466 // absint_byte_t represents an 8-bit entity that is equivalent to
467 // an absint_val_t, or is equivalent to some (unary or binary) operation
468 // on absint_val_t's like NOT, AND, IOR, XOR that operate bit-wise (and
469 // hence also byte-wise).
470 // absint_t represents an array of absint_byte_t's. When some insn is applied
471 // to a GPR state, then memento_t.apply_insn() represents the RHS of
472 // a single_set as an absint_t, and then applies that result to the GPRs.
473 // For example, in int y = x << 8 the representation is x = [r25; r24]
474 // and RHS = [r24; 00].
475 struct absint_val_t;
476 class absint_byte_t;
477 struct absint_t;
479 // A ply_t is a potential step towards an optimal sequence to load a constant
480 // value into a multi-byte register. A ply_t loosely relates to one AVR
481 // instruction, but it may also represent a sequence of instructions.
482 // For example, loading a constant into a lower register when no sratch reg
483 // is available may take up to 4 instructions. There is no 1:1 correspondence
484 // to insns, either.
485 // try_split_ldi determines the best sequence of ply_t's by means of a
486 // brute-force search with tree pruning: It's much too complicated to
487 // construct a good sequence directly, but there are many conditions that
488 // good sequence will satisfy, implemented in bbinfo_t::find_plies.
489 struct ply_t;
490 struct plies_t;
492 // The maximal number of ply_t's in any conceivable optimal solution
493 // that is better than what a vanilla mov<mode> generates.
494 // This is 6 for modes <= 4 and 8 for modes == 8.
495 static constexpr int N_BEST_PLYS = 8;
497 #define FUSE_MOVE_MAX_MODESIZE 8
499 #include "avr-passes-fuse-move.h"
501 // Static members.
503 gprmask_t memento_t::fixed_regs_mask;
505 // Statistics.
506 int ply_t::n_ply_ts;
507 int ply_t::max_n_ply_ts;
508 int plies_t::max_n_plies;
510 bbinfo_t *bbinfo_t::bb_info;
511 int bbinfo_t::tick;
512 bbinfo_t::find_plies_data_t *bbinfo_t::fpd;
514 // Which optimizations should be performed.
515 bool bbinfo_t::try_fuse_p;
516 bool bbinfo_t::try_bin_arg1_p;
517 bool bbinfo_t::try_split_ldi_p;
518 bool bbinfo_t::try_split_any_p;
519 bool bbinfo_t::try_simplify_p;
520 bool bbinfo_t::use_arith_p;
521 bool bbinfo_t::use_set_some_p;
522 bool bbinfo_t::try_mem0_p;
525 // Abstract Interpretation of expressions.
526 // A bunch of absint_byte_t's.
528 struct absint_t
530 static constexpr int eq_size = FUSE_MOVE_MAX_MODESIZE;
531 std::array<absint_byte_t, eq_size> eq;
533 rtx xexp = NULL_RTX;
534 rtx xexp_new = NULL_RTX;
536 absint_byte_t &operator[] (int i)
538 gcc_assert (IN_RANGE (i, 0, absint_t::eq_size - 1));
539 return eq[i];
542 const absint_byte_t &operator[] (int i) const
544 gcc_assert (IN_RANGE (i, 0, absint_t::eq_size - 1));
545 return eq[i];
548 absint_t () {}
550 absint_t (rtx xold)
551 : xexp(xold)
554 absint_t (rtx xold, rtx xnew, int n_bytes)
555 : xexp(xold), xexp_new(xnew)
557 gcc_assert (n_bytes <= eq_size);
558 if (xnew)
559 for (int i = 0; i < n_bytes; ++i)
560 eq[i].learn_val8 (avr_uint8 (xnew, i));
563 // CODE != UNKNOWN: Maximal index of a byte with code CODE, or -1.
564 // CODE == UNKNOWN: Maximal index of a byte with known CODE, or -1.
565 int max_knows (rtx_code code = UNKNOWN) const
567 for (int i = eq_size - 1; i >= 0; --i)
568 if ((code == UNKNOWN && ! eq[i].can (UNKNOWN))
569 || (code != UNKNOWN && eq[i].can (code)))
570 return i;
571 return -1;
574 // CODE != UNKNOWN: Maximal i such that all bytes < i have code CODE.
575 // CODE == UNKNOWN: Maximal i such that all bytes < i have code != UNKNOWN.
576 int end_knows (rtx_code code = UNKNOWN) const
578 for (int i = 0; i < eq_size; ++i)
579 if ((code == UNKNOWN && eq[i].can (UNKNOWN))
580 || (code != UNKNOWN && ! eq[i].can (code)))
581 return i;
582 return eq_size;
585 // Number of bytes for which there is usable information.
586 int popcount () const
588 int pop = 0;
589 for (int i = 0; i < eq_size; ++i)
590 pop += ! eq[i].can (UNKNOWN);
591 return pop;
594 // Get the value under the assumption that all eq[].val8 are known.
595 uint64_t get_value (int n_bytes, bool strict = true) const
597 gcc_assert (IN_RANGE (n_bytes, 1, eq_size));
598 gcc_assert (! strict || end_knows (CONST_INT) >= n_bytes);
600 uint64_t val = 0;
601 for (int i = n_bytes - 1; i >= 0; --i)
602 val = 256 * val + eq[i].val8 (strict);
603 return val;
606 // Get n-byte value as a const_int, or NULL_RTX when (partially) unknown.
607 rtx get_value_as_const_int (int n_bytes) const
609 gcc_checking_assert (gpr_regno_p (REG_24, n_bytes));
611 if (end_knows (CONST_INT) < n_bytes)
612 return NULL_RTX;
614 const uint64_t val = get_value (n_bytes);
615 const machine_mode mode = size_to_mode (n_bytes);
617 return gen_int_mode (val, mode);
620 // Find a 16-bit register that contains the same value like held
621 // in positions I1 and I2 (if any). Return 0 when nothing appropriate
622 // for a MOVW is found.
623 int reg16_with_value (int i1, int i2, const memento_t &memo) const
625 if (i1 == (i2 ^ 1))
627 const int lo8 = eq[i1 & ~1].val8 (false);
628 const int hi8 = eq[i1 | 1].val8 (false);
629 if (lo8 >= 0 && hi8 >= 0)
630 return memo.reg16_with_value (lo8, hi8, 0);
632 return 0;
635 // When X is a REG rtx with a known content as of MEMO, then return
636 // the respective value as a constant for mode MODE.
637 // If X is NULL_RTX, or not a REG, or not known, then return NULL_RTX.
638 static rtx maybe_fold (rtx x, const memento_t &memo)
640 int n_bytes;
642 if (x != NULL_RTX
643 && REG_P (x)
644 && (n_bytes = GET_MODE_SIZE (GET_MODE (x))) <= FUSE_MOVE_MAX_MODESIZE
645 && gpr_regno_p (REGNO (x), n_bytes))
647 rtx xval = memo.get_value_as_const_int (REGNO (x), n_bytes);
648 if (xval)
649 return avr_chunk (GET_MODE (x), xval, 0);
652 return NULL_RTX;
655 // Try to conclude about the bytes that comprise X. DEST_MODE is the
656 // context mode that is used when X is CONST_INT and has VOIDmode.
657 static absint_t explore (rtx x, const memento_t &memo,
658 machine_mode dest_mode = VOIDmode)
660 const rtx_code code = GET_CODE (x);
661 bool worth_dumping = dump_file && (dump_flags & TDF_FOLDING);
663 const machine_mode mode = GET_MODE (x) == VOIDmode
664 ? dest_mode
665 : GET_MODE (x);
667 const int n_bytes = mode == VOIDmode && CONST_INT_P (x)
668 ? absint_t::eq_size
669 : GET_MODE_SIZE (mode);
671 if (! IN_RANGE (n_bytes, 1, absint_t::eq_size))
672 return absint_t (x);
674 // Eat our own dog food as produced by try_plit_ldi.
676 rtx xop0 = BINARY_P (x) || UNARY_P (x) ? XEXP (x, 0) : NULL_RTX;
677 rtx xval0 = xop0 && CONST_INT_OR_FIXED_P (xop0)
678 ? xop0
679 : absint_t::maybe_fold (xop0, memo);
681 if (UNARY_P (x)
682 && REG_P (xop0)
683 && GET_MODE (xop0) == mode
684 && xval0)
686 rtx y = simplify_unary_operation (code, mode, xval0, mode);
687 if (y && CONST_INT_OR_FIXED_P (y))
688 return absint_t (x, y, n_bytes);
691 rtx xop1 = BINARY_P (x) ? XEXP (x, 1) : NULL_RTX;
692 rtx xval1 = xop1 && CONST_INT_OR_FIXED_P (xop1)
693 ? xop1
694 : absint_t::maybe_fold (xop1, memo);
696 if (BINARY_P (x)
697 && xval0 && xval1)
699 rtx y = simplify_binary_operation (code, mode, xval0, xval1);
700 if (y && CONST_INT_OR_FIXED_P (y))
701 return absint_t (x, y, n_bytes);
704 // No fold to a constant value was found:
705 // Look at the individual bytes more closely.
707 absint_t ai (x);
709 switch (code)
711 default:
712 worth_dumping = false;
713 break;
715 case REG:
716 if (END_REGNO (x) <= REG_32
717 && ! (regmask (x) & memento_t::fixed_regs_mask))
718 for (unsigned r = REGNO (x); r < END_REGNO (x); ++r)
720 ai[r - REGNO (x)].learn_regno (r);
721 if (memo.knows (r))
722 ai[r - REGNO (x)].learn_val8 (memo.value (r));
724 break;
726 CASE_CONST_UNIQUE:
727 ai = absint_t (x, x, n_bytes);
728 break;
730 case ASHIFT:
731 case ASHIFTRT:
732 case LSHIFTRT:
733 case ROTATE:
734 case ROTATERT:
735 if ((CONST_INT_P (xop1) && INTVAL (xop1) >= 8)
736 // DImode shift offsets for transparent calls are shipped in R16.
737 || n_bytes == 8)
738 ai = explore_shift (x, memo);
739 break;
741 case AND:
742 case IOR:
743 case XOR:
745 const absint_t ai0 = absint_t::explore (xop0, memo, mode);
746 const absint_t ai1 = absint_t::explore (xop1, memo, mode);
747 for (int i = 0; i < n_bytes; ++i)
748 ai[i] = absint_byte_t (code, ai0[i], ai1[i]);
750 break;
752 case NOT:
754 const absint_t ai0 = absint_t::explore (xop0, memo);
755 for (int i = 0; i < n_bytes; ++i)
756 ai[i] = absint_byte_t (NOT, ai0[i]);
758 break;
760 case ZERO_EXTEND:
761 case SIGN_EXTEND:
763 const absint_t ai0 = absint_t::explore (xop0, memo);
764 const int ai0_size = GET_MODE_SIZE (GET_MODE (xop0));
765 const absint_byte_t b_signs = ai0[ai0_size - 1].get_signs (code);
766 for (int i = 0; i < n_bytes; ++i)
767 ai[i] = i < ai0_size ? ai0[i] : b_signs;
769 break;
771 case PLUS:
772 case MINUS:
773 if (SCALAR_INT_MODE_P (mode)
774 || ALL_SCALAR_FIXED_POINT_MODE_P (mode))
776 const absint_t ai0 = absint_t::explore (xop0, memo, mode);
777 const absint_t ai1 = absint_t::explore (xop1, memo, mode);
778 if (code == MINUS)
779 for (int i = 0; i < n_bytes && ai1[i].val8 (false) == 0; ++i)
780 ai[i] = ai0[i];
782 if (code == PLUS)
783 for (int i = 0; i < n_bytes; ++i)
785 if (ai0[i].val8 (false) == 0)
786 ai[i] = ai1[i];
787 else if (ai1[i].val8 (false) == 0)
788 ai[i] = ai0[i];
789 else
791 ai[i] = absint_byte_t (code, ai0[i], ai1[i]);
792 break;
796 if (code == PLUS
797 && GET_CODE (xop0) == ZERO_EXTEND
798 && CONST_INT_P (xop1))
800 rtx exop = XEXP (xop0, 0);
801 int exsize = GET_MODE_SIZE (GET_MODE (exop));
802 rtx lo_xop1 = avr_chunk (GET_MODE (exop), xop1, 0);
803 if (lo_xop1 == const0_rtx)
804 for (int i = exsize; i < n_bytes; ++i)
805 ai[i] = ai1[i];
808 break; // PLUS, MINUS
810 case MULT:
811 if (GET_MODE (xop0) == mode
812 && SCALAR_INT_MODE_P (mode))
814 // The constant may be located in xop0's zero_extend...
815 const absint_t ai0 = absint_t::explore (xop0, memo, mode);
816 const absint_t ai1 = absint_t::explore (xop1, memo, mode);
817 const int end0 = ai0.end_knows (CONST_INT);
818 const int end1 = ai1.end_knows (CONST_INT);
819 const uint64_t mul0 = end0 > 0 ? ai0.get_value (end0) : 1;
820 const uint64_t mul1 = end1 > 0 ? ai1.get_value (end1) : 1;
821 // Shifting in off/8 zero bytes from the right.
822 const int off = mul0 * mul1 != 0 ? ctz_hwi (mul0 * mul1) : 0;
823 for (int i = 0; i < off / 8; ++i)
824 ai[i].learn_val8 (0);
826 break; // MULT
828 case BSWAP:
829 if (GET_MODE (xop0) == mode)
831 const absint_t ai0 = absint_t::explore (xop0, memo);
832 for (int i = 0; i < n_bytes; ++i)
833 ai[i] = ai0[n_bytes - 1 - i];
835 break;
836 } // switch code
838 if (worth_dumping)
840 avr_dump (";; AI.explore %C:%m ", code, mode);
841 ai.dump ();
844 for (int i = 0; i < n_bytes; ++i)
845 gcc_assert (ai[i].check ());
847 return ai;
850 // Helper for the method above.
851 static absint_t explore_shift (rtx x, const memento_t &memo)
853 absint_t ai (x);
855 const rtx_code code = GET_CODE (x);
856 const machine_mode mode = GET_MODE (x);
857 const int n_bytes = GET_MODE_SIZE (mode);
859 if (! BINARY_P (x))
860 return ai;
862 rtx xop0 = XEXP (x, 0);
863 rtx xop1 = XEXP (x, 1);
865 // Look at shift offsets of DImode more closely;
866 // they are in R16 for __lshrdi3 etc. Patch xop1 on success.
867 if (n_bytes == 8
868 && ! CONST_INT_P (xop1)
869 && GET_MODE (xop0) == mode)
871 const int n_off = GET_MODE_SIZE (GET_MODE (xop1));
872 const absint_t aoff = absint_t::explore (xop1, memo);
873 xop1 = aoff.get_value_as_const_int (n_off);
876 if (! xop1
877 || GET_MODE (xop0) != mode
878 || ! IN_RANGE (n_bytes, 1, FUSE_MOVE_MAX_MODESIZE)
879 || ! CONST_INT_P (xop1)
880 || ! IN_RANGE (INTVAL (xop1), 8, 8 * n_bytes - 1))
881 return ai;
883 const int off = INTVAL (xop1);
884 const absint_t ai0 = absint_t::explore (xop0, memo);
886 switch (GET_CODE (x))
888 default:
889 break;
891 case ASHIFT:
892 // Shifting in 0x00's from the right.
893 for (int i = 0; i < off / 8; ++i)
894 ai[i].learn_val8 (0);
895 break;
897 case LSHIFTRT:
898 case ASHIFTRT:
900 // Shifting in 0x00's or signs from the left.
901 absint_byte_t b_signs = ai0[n_bytes - 1].get_signs (GET_CODE (x));
902 for (int i = n_bytes - off / 8; i < n_bytes; ++i)
903 ai[i] = b_signs;
904 if (off == 8 * n_bytes - 1)
905 if (code == ASHIFTRT)
906 ai[0] = b_signs;
908 break;
911 if (off % 8 != 0
912 || ai0.popcount () == 0)
913 return ai;
915 // For shift offsets that are a multiple of 8, record the
916 // action on the constituent bytes.
918 // Bytes are moving left by this offset (or zero for "none").
919 const int boffL = select<int>()
920 : code == ROTATE || code == ASHIFT ? off / 8
921 : code == ROTATERT ? n_bytes - off / 8
922 : 0;
924 // Bytes are moving right by this offset (or zero for "none").
925 const int boffR = select<int>()
926 : code == ROTATERT || code == ASHIFTRT || code == LSHIFTRT ? off / 8
927 : code == ROTATE ? n_bytes - off / 8
928 : 0;
930 if (dump_flags & TDF_FOLDING)
932 avr_dump (";; AI.explore_shift %C:%m ", code, mode);
933 if (boffL)
934 avr_dump ("<< %d%s", 8 * boffL, boffL && boffR ? ", " : "");
935 if (boffR)
936 avr_dump (">> %d", 8 * boffR);
937 avr_dump ("\n");
940 if (boffL)
941 for (int i = 0; i < n_bytes - boffL; ++i)
942 ai[i + boffL] = ai0[i];
944 if (boffR)
945 for (int i = boffR; i < n_bytes; ++i)
946 ai[i - boffR] = ai0[i];
948 return ai;
951 void dump (const char *msg = nullptr, FILE *f = dump_file) const
953 if (f)
954 dump (NULL_RTX, msg, f);
957 void dump (rtx dest, const char *msg = nullptr, FILE *f = dump_file) const
959 if (f)
961 int regno = dest && REG_P (dest) ? REGNO (dest) : 0;
963 msg = msg && msg[0] ? msg : "AI=[%s]\n";
964 const char *const xs = strstr (msg, "%s");
965 gcc_assert (xs);
967 fprintf (f, "%.*s", (int) (xs - msg), msg);
968 for (int i = max_knows (); i >= 0; --i)
970 const int sub_regno = eq[i].regno (false /*nonstrict*/);
971 const bool nop = regno && sub_regno == regno + i;
972 eq[i].dump (nop ? "%s=nop" : "%s", f);
973 fprintf (f, "%s", i ? "; " : "");
975 fprintf (f, "%s", xs + strlen ("%s"));
978 }; // absint_t
981 // Information for a REG = non-memory single_set.
983 struct insninfo_t
985 // This is an insn that sets the m_size bytes of m_regno to either
986 // - A compile time constant m_isrc (m_code = CONST_INT), or
987 // - The contents of register number m_rsrc (m_code = REG).
988 int m_size = 0;
989 int m_regno;
990 int m_rsrc;
991 rtx_code m_code;
992 uint64_t m_isrc;
993 rtx_insn *m_insn = nullptr;
994 rtx m_set = NULL_RTX;
995 rtx m_src = NULL_RTX;
996 int m_scratch = 0; // 0 or the register number of a QImode scratch.
997 rtx_code m_old_code = UNKNOWN;
999 // Knowledge about the bytes of the SET_SRC: A byte may have a known
1000 // value, may be known to equal some register (e.g. with BSWAP),
1001 // or both, or may be unknown.
1002 absint_t m_ai;
1004 // May be set for binary operations.
1005 absint_byte_t m_new_src;
1007 bool init1 (insn_optimize_data_t &, int max_size, const char *purpose);
1009 // Upper bound for the cost (in words) of a move<mode> insn that
1010 // performs a REG = CONST_XXX = .m_isrc move of modesize .m_size.
1011 int cost () const;
1012 bool combine (const insninfo_t &prev, const insninfo_t &curr);
1013 int emit_insn () const;
1015 bool needs_scratch () const
1017 gcc_assert (m_code == CONST_INT);
1019 for (int i = 0; i < m_size; ++i)
1020 if (AVRasm::ldi_needs_scratch (m_regno, m_isrc >> (8 * i)))
1021 return true;
1023 return false;
1026 int hamming (const memento_t &memo) const
1028 gcc_assert (m_code == CONST_INT);
1030 int h = 0;
1031 for (int i = 0; i < m_size; ++i)
1032 h += ! memo.have_value (m_regno + i, 1, 0xff & (m_isrc >> (8 * i)));
1034 return h;
1037 // Upper bound for the number of ply_t's of a solution, given Hamming
1038 // distance of HAMM (-1 for unknown).
1039 int n_best_plys (int hamm = -1) const
1041 gcc_assert (m_code == CONST_INT);
1043 if (m_size == 8)
1044 return (hamm >= 0 ? hamm : m_size);
1045 else if (hamm <= 4)
1046 return (hamm >= 0 ? hamm : m_size)
1047 // The following terms is the max number of MOVWs with a
1048 // Hamming difference of less than 2.
1049 + (AVR_HAVE_MOVW && m_regno < REG_14) * m_size / 2
1050 + (AVR_HAVE_MOVW && m_regno == REG_14) * std::max (0, m_size - 2)
1051 - (AVR_HAVE_MOVW && hamm == 4 && (uint32_t) m_isrc % 0x10001 == 0);
1052 else
1053 gcc_unreachable ();
1055 }; // insninfo_t
1058 struct insn_optimize_data_t
1060 // Known values held in GPRs prior to the action of .insn / .ii,
1061 memento_t &regs;
1062 rtx_insn *insn;
1063 insninfo_t ii;
1064 bool unused;
1066 insn_optimize_data_t () = delete;
1068 insn_optimize_data_t (memento_t &memo)
1069 : regs(memo)
1071 }; // insn_optimize_data_t
1073 struct optimize_data_t
1075 insn_optimize_data_t prev;
1076 insn_optimize_data_t curr;
1078 // Number >= 0 of new insns that replace the curr insn and maybe also the
1079 // prev insn. -1 when no replacement has been found.
1080 int n_new_insns = -1;
1082 // .prev will be removed provided we have (potentially zero) new insns.
1083 bool delete_prev_p = false;
1085 // Ignore these GPRs when comparing the simulation results of
1086 // old and new insn sequences. Usually some scratch reg(s).
1087 gprmask_t ignore_mask = 0;
1089 optimize_data_t () = delete;
1091 optimize_data_t (memento_t &prev_regs, memento_t &curr_regs)
1092 : prev(prev_regs), curr(curr_regs)
1095 bool try_fuse (bbinfo_t *);
1096 bool try_mem0 (bbinfo_t *);
1097 bool try_bin_arg1 (bbinfo_t *);
1098 bool try_simplify (bbinfo_t *);
1099 bool try_split_ldi (bbinfo_t *);
1100 bool try_split_any (bbinfo_t *);
1101 bool fail (const char *reason);
1102 bool emit_signs (int r_sign, gprmask_t);
1103 void emit_move_mask (int dest, int src, int n_bytes, gprmask_t &);
1104 rtx_insn *emit_sequence (basic_block, rtx_insn *);
1105 bool get_2ary_operands (rtx_code &, const absint_byte_t &,
1106 insn_optimize_data_t &, int r_dest,
1107 absint_val_t &, absint_val_t &, int &ex_cost);
1108 rtx_insn *emit_and_apply_move (memento_t &, rtx dest, rtx src);
1110 // M2 is the state of GPRs as the sequence starts; M1 is the state one before.
1111 static void apply_sequence (const std::vector<rtx_insn *> &insns,
1112 memento_t &m1, memento_t &m2)
1114 gcc_assert (insns.size () >= 1);
1116 for (auto &i : insns)
1118 m1 = m2;
1119 m2.apply_insn (i, false);
1122 }; // optimize_data_t
1125 // Emit INSNS before .curr.insn, replacing .curr.insn and also .prev.insn when
1126 // .delete_prev_p is on. Adjusts .curr.regs and .prev.regs accordingly.
1127 rtx_insn *
1128 optimize_data_t::emit_sequence (basic_block bb, rtx_insn *insns)
1130 gcc_assert (n_new_insns >= 0);
1132 // The old insns will be replaced by and simulated...
1133 const std::vector<rtx_insn *> old_insns = delete_prev_p
1134 ? std::vector<rtx_insn *> { prev.insn, curr.insn }
1135 : std::vector<rtx_insn *> { curr.insn };
1137 // ...against the new insns.
1138 std::vector<rtx_insn *> new_insns;
1139 for (rtx_insn *i = insns; i; i = NEXT_INSN (i))
1140 new_insns.push_back (i);
1142 rtx_insn *new_curr_insn;
1144 memento_t &m1 = prev.regs;
1145 memento_t &m2 = curr.regs;
1147 if (new_insns.empty ())
1149 if (delete_prev_p)
1151 m2 = m1;
1152 m1.known = 0;
1153 new_curr_insn = prev_nondebug_insn_bb (bb, prev.insn);
1155 else
1156 new_curr_insn = prev.insn;
1158 else
1160 // We are going to emit at least one new insn. Simulate the effect of
1161 // the new sequence and compare it against the effect of the old one.
1162 // Both effects must be the same (modulo scratch regs).
1164 memento_t n1 = m1;
1165 memento_t n2 = m2;
1167 if (delete_prev_p)
1169 m2 = m1, m1.known = 0;
1170 n2 = n1, n1.known = 0;
1173 avr_dump (";; Applying new route...\n");
1174 optimize_data_t::apply_sequence (new_insns, n1, n2);
1176 avr_dump (";; Applying old route...\n");
1177 optimize_data_t::apply_sequence (old_insns, m1, m2);
1178 avr_dump ("\n");
1180 if (! m2.equals (n2, ignore_mask))
1182 // When we come here, then
1183 // - We have a genuine bug, and/or
1184 // - We did produce insns that are opaque to absint_t's explore().
1185 avr_dump ("INCOMPLETE APPLICATION:\n");
1186 m2.dump ("regs old route=%s\n\n");
1187 n2.dump ("regs new route=%s\n\n");
1188 avr_dump ("The new insns are:\n%L", insns);
1190 fatal_insn ("incomplete application of insn", insns);
1193 // Use N1 and N2 as the new GPR states. Even though they are equal
1194 // modulo ignore_mask, N2 may know more about GPRs when it doesn't
1195 // clobber the scratch reg.
1196 m1 = n1;
1197 m2 = n2;
1199 emit_insn_before (insns, curr.insn);
1201 new_curr_insn = new_insns.back ();
1204 if (delete_prev_p)
1205 SET_INSN_DELETED (prev.insn);
1207 SET_INSN_DELETED (curr.insn);
1209 return new_curr_insn;
1213 const pass_data avr_pass_data_fuse_move =
1215 RTL_PASS, // type
1216 "", // name (will be patched)
1217 OPTGROUP_NONE, // optinfo_flags
1218 TV_MACH_DEP, // tv_id
1219 0, // properties_required
1220 0, // properties_provided
1221 0, // properties_destroyed
1222 0, // todo_flags_start
1223 TODO_df_finish | TODO_df_verify // todo_flags_finish
1227 class avr_pass_fuse_move : public rtl_opt_pass
1229 public:
1230 avr_pass_fuse_move (gcc::context *ctxt, const char *name)
1231 : rtl_opt_pass (avr_pass_data_fuse_move, ctxt)
1233 this->name = name;
1236 unsigned int execute (function *func) final override
1238 if (optimize > 0 && avropt_fuse_move > 0)
1240 df_note_add_problem ();
1241 df_analyze ();
1243 bbinfo_t::optimize_one_function (func);
1246 return 0;
1248 }; // avr_pass_fuse_move
1251 // Append PLY to .plies[]. A SET or BLD ply may start a new sequence of
1252 // SETs or BLDs and gets assigned the overhead of the sequence like for an
1253 // initial SET or CLT instruction. A SET ply my be added in two flavours:
1254 // One that starts a sequence of single_sets, and one that represents the
1255 // payload of a set_some insn. MEMO is the GPR state prior to PLY.
1256 void
1257 plies_t::add (ply_t ply, const ply_t *prev, const memento_t &memo,
1258 bool maybe_set_some)
1260 if (ply.code == SET)
1262 if (prev && prev->code == SET)
1264 // Proceed with the SET sequence flavour.
1265 ply.in_set_some = prev->in_set_some;
1267 if (ply.in_set_some)
1268 ply.scratch = 0;
1269 else if (! ply.scratch && ply.needs_scratch ())
1270 ply.cost += 2;
1272 else
1274 // The 1st SET in a sequence. May use set_some to set
1275 // all bytes in one insn, or a bunch of single_sets.
1277 // Route1: Bunch of single_sets.
1278 const int ply_cost = ply.cost;
1279 if (! ply.scratch && ply.needs_scratch ())
1280 ply.cost += 2;
1281 ply.in_set_some = false;
1283 add (ply);
1285 if (maybe_set_some)
1287 // Route 2: One set_some: The 1st SET gets all the overhead.
1288 ply.scratch = 0;
1289 ply.cost = ply_cost + 1 + ! memo.known_dregno ();
1290 ply.in_set_some = true;
1293 } // SET
1294 else if (ply.is_bld ())
1296 // The first BLD in a series of BLDs gets the extra costs
1297 // for the SET / CLT that precedes the BLDs.
1298 ply.cost += ! ply.is_same_bld (prev);
1301 add (ply);
1305 // Emit insns for .plies[] and return the number of emitted insns.
1306 // The emitted insns represent the effect of II with MEMO, which
1307 // is the GPR knowledge before II is executed.
1309 plies_t::emit_insns (const insninfo_t &ii, const memento_t &memo) const
1311 int n_insns = 0;
1313 for (int i = 0; i < n_plies; ++i)
1315 const ply_t &p = plies[i];
1317 // SETs and BLDs are dumped by their emit_xxxs().
1318 if (p.code != SET && ! p.is_bld ())
1319 p.dump ();
1321 rtx src1 = NULL_RTX;
1322 rtx src2 = NULL_RTX;
1323 rtx dest = NULL_RTX;
1324 rtx xscratch = NULL_RTX;
1325 rtx_code code = p.code;
1327 switch (p.code)
1329 default:
1330 avr_dump ("\n\n;; Bad ply_t:\n");
1331 p.dump (i + 1);
1332 gcc_unreachable ();
1333 break;
1335 case REG: // *movhi = MOVW; movqi_insn = MOV
1336 dest = gen_rtx_REG (p.size == 1 ? QImode : HImode, p.regno);
1337 src1 = gen_rtx_REG (p.size == 1 ? QImode : HImode, p.arg);
1338 break;
1340 case SET: // movqi_insn = LDI, CLR; set_some = (LDI + MOV) ** size.
1341 i += emit_sets (ii, n_insns, memo, i) - 1;
1342 continue;
1344 case MOD: // *ior<mode>3, *and<mode>3 = SET + BLD... / CLT + BLD...
1345 i += emit_blds (ii, n_insns, i) - 1;
1346 continue;
1348 case MINUS: // *subqi3 = SUB
1349 case PLUS: // *addqi3 = ADD
1350 case AND: // *andqi3 = AND
1351 case IOR: // *iorqi3 = OR
1352 case XOR: // *xorqi3 = EOR
1353 dest = gen_rtx_REG (QImode, p.regno);
1354 src2 = gen_rtx_REG (QImode, p.arg);
1355 break;
1357 case PRE_INC: // *addqi3 = INC
1358 case PRE_DEC: // *addqi3 = DEC
1359 code = PLUS;
1360 dest = gen_rtx_REG (QImode, p.regno);
1361 src2 = p.code == PRE_INC ? const1_rtx : constm1_rtx;
1362 break;
1364 case NEG: // *negqi2 = NEG
1365 case NOT: // *one_cmplqi2 = COM
1366 dest = gen_rtx_REG (QImode, p.regno);
1367 src1 = dest;
1368 break;
1370 case ROTATE: // *rotlqi3 = SWAP
1371 case ASHIFT: // *ashlqi3 = LSL
1372 case ASHIFTRT: // *ashrqi3 = ASR
1373 case LSHIFTRT: // *lshrqi3 = LSR
1374 dest = gen_rtx_REG (QImode, p.regno);
1375 src2 = GEN_INT (code == ROTATE ? 4 : 1);
1376 break;
1378 case SS_PLUS: // *addhi3 = ADIW, SBIW
1379 code = PLUS;
1380 dest = gen_rtx_REG (HImode, p.regno);
1381 src2 = gen_int_mode (p.arg, HImode);
1382 break;
1383 } // switch p.code
1385 gcc_assert (dest && (! src1) + (! src2) == 1);
1387 rtx src = code == REG || code == SET
1388 ? src1
1389 : (src2
1390 ? gen_rtx_fmt_ee (code, GET_MODE (dest), dest, src2)
1391 : gen_rtx_fmt_e (code, GET_MODE (dest), src1));
1393 emit_valid_move_clobbercc (dest, src, xscratch);
1394 n_insns += 1;
1397 return n_insns;
1401 // Helper for .emit_insns(). Emit an ior<mode>3 or and<mode>3 insn
1402 // that's equivalent to a sequence of contiguous BLDs starting at
1403 // .plies[ISTART]. Updates N_INSNS according to the number of insns
1404 // emitted and returns the number of consumed plys in .plies[].
1406 plies_t::emit_blds (const insninfo_t &ii, int &n_insns, int istart) const
1408 const ply_t &first = plies[istart];
1410 gcc_assert (ii.m_size <= 4);
1411 gcc_assert (first.is_bld ());
1413 const rtx_code code = first.is_setbld () ? IOR : AND;
1414 const machine_mode mode = size_to_mode (ii.m_size);
1416 // Determine mask and number of BLDs.
1418 uint32_t mask = 0;
1419 int n_blds = 0;
1421 for (int i = istart; i < n_plies; ++i, ++n_blds)
1423 const ply_t &p = plies[i];
1424 if (! p.is_bld () || ! p.is_same_bld (& first))
1425 break;
1427 // For AND, work on the 1-complement of the mask,
1428 // i.e. 1's specify which bits to clear.
1429 uint8_t mask8 = code == IOR ? p.arg : ~p.arg;
1430 mask |= mask8 << (8 * (p.regno - ii.m_regno));
1433 mask = GET_MODE_MASK (mode) & (code == IOR ? mask : ~mask);
1435 if (dump_file)
1437 fprintf (dump_file, ";; emit_blds[%d...%d] R%d[%d]%s=%0*x\n",
1438 istart, istart + n_blds - 1, ii.m_regno, ii.m_size,
1439 code == IOR ? "|" : "&", 2 * ii.m_size, (int) mask);
1442 for (int i = 0; i < n_blds; ++i)
1443 plies[i + istart].dump ();
1445 rtx dest = gen_rtx_REG (mode, ii.m_regno);
1446 rtx src = gen_rtx_fmt_ee (code, mode, dest, gen_int_mode (mask, mode));
1447 rtx xscratch = mode == QImode ? NULL_RTX : gen_rtx_SCRATCH (QImode);
1449 emit_valid_move_clobbercc (dest, src, xscratch);
1450 n_insns += 1;
1452 return n_blds;
1456 // Emit insns for a contiguous sequence of SET ply_t's starting at
1457 // .plies[ISTART]. Advances N_INSNS by the number of emitted insns.
1458 // MEMO ist the state of the GPRs before II is executed, where II
1459 // represents the insn under optimization.
1460 // The emitted insns are "movqi_insn" or "*reload_inqi"
1461 // when .plies[ISTART].in_set_some is not set, and one "set_some" insn
1462 // when .plies[ISTART].in_set_some is set.
1464 plies_t::emit_sets (const insninfo_t &ii, int &n_insns, const memento_t &memo,
1465 int istart) const
1467 gcc_assert (plies[istart].code == SET);
1469 const bool in_set_some = plies[istart].in_set_some;
1471 // Some d-regno that holds a compile-time constant, or 0.
1472 const int known_dregno = memo.known_dregno ();
1474 // Determine number of contiguous SETs,
1475 // and sort them in ps[] such that smaller regnos come first.
1477 const ply_t *ps[FUSE_MOVE_MAX_MODESIZE];
1478 int n_sets = 0;
1480 for (int i = istart; i < n_plies && plies[i].code == SET; ++i)
1481 ps[n_sets++] = & plies[i];
1483 if (dump_file)
1485 fprintf (dump_file, ";; emit_sets[%d...%d] R%d[%d]=%0*" PRIx64,
1486 istart, istart + n_sets - 1, ii.m_regno, ii.m_size,
1487 2 * ii.m_size, ii.m_isrc);
1488 fprintf (dump_file, ", scratch=%s%d", "R" + ! ii.m_scratch, ii.m_scratch);
1489 fprintf (dump_file, ", known_dreg=%s%d, set_some=%d\n",
1490 "R" + ! known_dregno, known_dregno, in_set_some);
1493 for (int i = 0; i < n_sets; ++i)
1494 ps[i]->dump ();
1496 // Sort. This is most useful on regs like (reg:SI REG_14).
1497 for (int i = 0; i < n_sets - 1; ++i)
1498 for (int j = i + 1; j < n_sets; ++j)
1499 if (ps[i]->regno > ps[j]->regno)
1500 std::swap (ps[i], ps[j]);
1502 // Prepare operands.
1503 rtx dst[FUSE_MOVE_MAX_MODESIZE];
1504 rtx src[FUSE_MOVE_MAX_MODESIZE];
1505 for (int i = 0; i < n_sets; ++i)
1507 dst[i] = gen_rtx_REG (QImode, ps[i]->regno);
1508 src[i] = gen_int_mode (ps[i]->arg, QImode);
1511 if (in_set_some)
1513 // Emit a "set_some" insn that sets all of the collected 8-bit SETs.
1514 // This is a parallel with n_sets QImode SETs as payload.
1516 gcc_assert (! known_dregno || memo.knows (known_dregno));
1518 // A scratch reg...
1519 rtx op1 = known_dregno
1520 ? gen_rtx_REG (QImode, known_dregno)
1521 : const0_rtx;
1522 // ...with a known content, so it can be restored without saving.
1523 rtx op2 = known_dregno
1524 ? gen_int_mode (memo.values[known_dregno], QImode)
1525 : const0_rtx;
1526 // Target register envelope.
1527 rtx op3 = GEN_INT (ii.m_regno);
1528 rtx op4 = GEN_INT (ii.m_size);
1530 // Payload.
1531 for (int i = 0; i < n_sets; ++i)
1532 dst[i] = gen_rtx_SET (dst[i], src[i]);
1534 rtvec vec = gen_rtvec (5 + n_sets,
1535 gen_rtx_USE (VOIDmode, op1),
1536 gen_rtx_USE (VOIDmode, op2),
1537 gen_rtx_USE (VOIDmode, op3),
1538 gen_rtx_USE (VOIDmode, op4),
1539 gen_rtx_CLOBBER (VOIDmode, cc_reg_rtx),
1540 dst[0], dst[1], dst[2], dst[3]);
1541 rtx pattern = gen_rtx_PARALLEL (VOIDmode, vec);
1543 emit_valid_insn (pattern);
1544 n_insns += 1;
1546 else
1548 // Emit a bunch of movqi_insn / *reload_inqi insns.
1550 for (int i = 0; i < n_sets; ++i)
1551 if (ii.m_scratch
1552 && AVRasm::constant_cost (SET, ps[i]->regno, ps[i]->arg) > 1)
1554 rtx scratch = gen_rtx_REG (QImode, ii.m_scratch);
1555 bool use_reload_inqi = true;
1556 if (use_reload_inqi)
1558 emit_valid_move_clobbercc (dst[i], src[i], scratch);
1559 n_insns += 1;
1561 else
1563 emit_valid_move_clobbercc (scratch, src[i]);
1564 emit_valid_move_clobbercc (dst[i], scratch);
1565 n_insns += 2;
1568 else
1570 emit_valid_move_clobbercc (dst[i], src[i]);
1571 n_insns += 1;
1575 return n_sets;
1579 // Try to find an operation such that Y = op (X).
1580 // Shifts and rotates are regarded as unary operaions with
1581 // an implied 2nd operand or 1 or 4, respectively.
1582 static rtx_code
1583 find_arith (uint8_t y, uint8_t x)
1585 #define RETIF(ex, code) y == (0xff & (ex)) ? code
1586 return select<rtx_code>()
1587 : RETIF (x + 1, PRE_INC)
1588 : RETIF (x - 1, PRE_DEC)
1589 : RETIF ((x << 4) | (x >> 4), ROTATE)
1590 : RETIF (-x, NEG)
1591 : RETIF (~x, NOT)
1592 : RETIF (x >> 1, LSHIFTRT)
1593 : RETIF (x << 1, ASHIFT)
1594 : RETIF ((x >> 1) | (x & 0x80), ASHIFTRT)
1595 : UNKNOWN;
1596 #undef RETIF
1600 // Try to find an operation such that Z = X op X.
1601 static rtx_code
1602 find_arith2 (uint8_t z, uint8_t x, uint8_t y)
1604 #define RETIF(ex, code) z == (0xff & (ex)) ? code
1605 return select<rtx_code>()
1606 : RETIF (x + y, PLUS)
1607 : RETIF (x - y, MINUS)
1608 : RETIF (x & y, AND)
1609 : RETIF (x | y, IOR)
1610 : RETIF (x ^ y, XOR)
1611 : UNKNOWN;
1612 #undef RETIF
1616 // Add plies to .plies[] that represent a MOVW, but only ones that reduce
1617 // the Hamming distance from REGNO[SIZE] to VAL by exactly DHAMM.
1618 void
1619 plies_t::add_plies_movw (int regno, int size, uint64_t val,
1620 int dhamm, const memento_t &memo)
1622 if (! AVR_HAVE_MOVW || size < 2)
1623 return;
1625 for (int i = 0; i < size - 1; i += 2)
1627 // MOVW that sets less than 2 regs to the target value is
1628 // not needed for the upper regs.
1629 if (dhamm != 2 && regno + i >= REG_16)
1630 continue;
1632 const uint16_t val16 = val >> (8 * i);
1633 const uint8_t lo8 = val16;
1634 const uint8_t hi8 = val16 >> 8;
1636 // When one of the target bytes is already as expected, then
1637 // no MOVW is needed for an optimal sequence.
1638 if (memo.have_value (regno + i, 1, lo8)
1639 || memo.have_value (regno + i + 1, 1, hi8))
1640 continue;
1642 const int h_old = memo.hamming (regno + i, 2, val16);
1644 // Record MOVWs that reduce the Hamming distance by DHAMM as requested.
1645 for (int j = FIRST_GPR; j < REG_32; j += 2)
1646 if (j != regno + i
1647 && memo.knows (j, 2))
1649 const int h_new = memo.hamming (j, 2, val16);
1650 if (h_new == h_old - dhamm)
1651 add (ply_t { regno + i, 2, REG, j, 1, dhamm });
1657 // Set PS to plys that reduce the Hamming distance from II.m_regno to
1658 // compile-time constant II.m_isrc by 2, 1 or 0. PREV is NULL or points
1659 // to a previous ply_t. MEMO is the GPR state after PREV and prior to the
1660 // added plys.
1661 void
1662 bbinfo_t::get_plies (plies_t &ps, const insninfo_t &ii, const memento_t &memo,
1663 const ply_t *prev)
1665 ps.reset ();
1667 fpd->n_get_plies += 1;
1669 const bool maybe_set_some = (bbinfo_t::use_set_some_p && ii.needs_scratch ());
1671 // Start with cheap plies, then continue to more expensive ones.
1672 const int regno = ii.m_regno;
1673 const int size = ii.m_size;
1674 const uint64_t val = ii.m_isrc;
1676 // Find MOVW with a Hamming delta of 2.
1677 ps.add_plies_movw (regno, size, val, 2, memo);
1679 // Find ADIW / SBIW
1680 if (AVR_HAVE_ADIW && size >= 2)
1681 for (int i = 0; i < size - 1; i += 2)
1682 if (regno + i >= REG_24
1683 && memo.knows (regno + i, 2))
1685 const int16_t value16 = memo[regno + i] + 256 * memo[regno + i + 1];
1686 const int16_t lo16 = val >> (8 * i);
1687 const int16_t delta = lo16 - value16;
1688 const uint8_t lo8 = val >> (8 * i);
1689 const uint8_t hi8 = val >> (8 * i + 8);
1690 if (IN_RANGE (delta, -63, 63)
1691 && lo8 != memo[regno + i]
1692 && hi8 != memo[regno + i + 1])
1694 ps.add (ply_t { regno + i, 2, SS_PLUS, delta, 1, 2 });
1698 // Find 1-reg plies. In an optimal sequence, each 1-reg ply will decrease
1699 // the Hamming distance. Thus we only have to consider plies that set
1700 // one of the target bytes to the target value VAL. Start with the
1701 // high registers since that is the canonical order when two plies commute.
1703 for (int i = size - 1; i >= 0; --i)
1705 const uint8_t val8 = val >> (8 * i);
1707 // Nothing to do for this byte when its value is already as desired.
1708 if (memo.have_value (regno + i, 1, val8))
1709 continue;
1711 // LDI or CLR.
1712 if (regno + i >= REG_16 || val8 == 0)
1713 ps.add (ply_t { regno + i, 1, SET, val8, 1 }, prev, memo,
1714 maybe_set_some);
1716 // We only may need to MOV non-zero values since there is CLR,
1717 // and only when there is no LDI.
1718 if (val8 != 0
1719 && regno + i < REG_16)
1721 // MOV where the source register is one of the target regs.
1722 for (int j = 0; j < size; ++j)
1723 if (j != i)
1724 if (memo.have_value (regno + j, 1, val8))
1725 ps.add (ply_t { regno + i, 1, REG, regno + j, 1 });
1727 // MOV where the source register is not a target reg.
1728 // FIXME: ticks.
1729 for (int j = FIRST_GPR; j < REG_32; ++j)
1730 if (! IN_RANGE (j, regno, regno + size - 1))
1731 if (memo.have_value (j, 1, val8))
1732 ps.add (ply_t { regno + i, 1, REG, j, 1 });
1734 // LDI + MOV.
1735 if (regno + i < REG_16 && val8 != 0)
1737 ply_t p { regno + i, 1, SET, val8, 2 };
1738 p.scratch = ii.m_scratch;
1739 ps.add (p, prev, memo, maybe_set_some);
1744 // Arithmetic like INC, DEC or ASHIFT.
1745 for (int i = size - 1; i >= 0; --i)
1746 if (bbinfo_t::use_arith_p
1747 && regno + i < REG_16
1748 && memo.knows (regno + i))
1750 const uint8_t y = val >> (8 * i);
1751 const uint8_t x = memo[regno + i];
1752 rtx_code code;
1754 if (y == 0 || y == x)
1755 continue;
1757 // INC, DEC, SWAP, LSL, NEG, ...
1758 if (UNKNOWN != (code = find_arith (y, x)))
1760 ps.add (ply_t { regno + i, 1, code, x /* dummy */, 1 });
1761 continue;
1764 // ADD, AND, ...
1765 for (int r = FIRST_GPR; r < REG_32; ++r)
1766 if (r != regno + i
1767 && memo.knows (r)
1768 && memo[r] != 0
1769 && UNKNOWN != (code = find_arith2 (y, x, memo[r])))
1771 ps.add (ply_t { regno + i, 1, code, r, 1 });
1774 if (size < 2 || size > 4)
1775 continue;
1777 // SET + BLD
1778 if ((x & y) == x && popcount_hwi (x ^ y) == 1)
1779 ps.add (ply_t { regno + i, 1, MOD, x ^ y, 1 },
1780 prev, memo, maybe_set_some);
1782 // CLT + BLD
1783 if ((x & y) == y && popcount_hwi (x ^ y) == 1)
1784 ps.add (ply_t { regno + i, 1, MOD, x ^ y ^ 0xff, 1 },
1785 prev, memo, maybe_set_some);
1788 if (bbinfo_t::use_arith_p
1789 // For 8-byte values, don't use ply_t's with only a partial reduction
1790 // of the hamming distance.
1791 && size <= 4)
1793 // Find MOVW with a Hamming delta of 1, then 0.
1794 ps.add_plies_movw (regno, size, val, 1, memo);
1795 ps.add_plies_movw (regno, size, val, 0, memo);
1798 plies_t::max_n_plies = std::max (plies_t::max_n_plies, ps.n_plies);
1802 // Try to combine two 8-bit insns PREV and CURR that (effectively)
1803 // are REG = CONST_INT to one 16-bit such insn. Returns true on success.
1804 bool
1805 insninfo_t::combine (const insninfo_t &prev, const insninfo_t &curr)
1807 if (prev.m_size == 1 && curr.m_size == 1
1808 && prev.m_regno == (1 ^ curr.m_regno)
1809 && curr.m_code == CONST_INT
1810 && prev.m_code == CONST_INT)
1812 m_regno = curr.m_regno & ~1;
1813 m_code = CONST_INT;
1814 m_size = 2;
1815 m_scratch = std::max (curr.m_scratch, prev.m_scratch);
1816 m_isrc = m_regno == prev.m_regno
1817 ? (uint8_t) prev.m_isrc + 256 * (uint8_t) curr.m_isrc
1818 : (uint8_t) curr.m_isrc + 256 * (uint8_t) prev.m_isrc;
1820 return true;
1823 return false;
1827 // Return the cost (in terms of words) of the respective mov<mode> insn.
1828 // This can be used as an upper bound for the ply_t's cost.
1830 insninfo_t::cost () const
1832 if (m_code != CONST_INT)
1833 return m_size;
1835 if (m_regno >= REG_16 || m_isrc == 0)
1836 return m_size
1837 // MOVW can save one instruction.
1838 - (AVR_HAVE_MOVW && m_size == 4 && (uint32_t) m_isrc % 0x10001 == 0);
1840 // LDI + MOV to a lower reg.
1841 if (m_scratch && m_size == 1)
1842 return 2;
1844 if (m_size == 8)
1846 int len = m_size;
1847 for (int i = 0; i < m_size; ++i)
1848 len += m_regno + i < REG_16 && (0xff & (m_isrc >> (8 * i))) != 0;
1849 return len;
1852 // All other cases are complicated. Ask the output oracle.
1853 const machine_mode mode = size_to_mode (m_size);
1854 rtx xscratch = m_scratch ? all_regs_rtx[m_scratch] : NULL_RTX;
1855 rtx xop[] = { gen_rtx_REG (mode, m_regno), gen_int_mode (m_isrc, mode) };
1856 int len;
1857 if (m_size == 4)
1858 output_reload_insisf (xop, xscratch, &len);
1859 else
1860 output_reload_in_const (xop, xscratch, &len, false);
1862 return len;
1865 // Emit the according REG = REG-or-CONST_INT insn. Returns 1 or aborts
1866 // when the insn is not of that form.
1868 insninfo_t::emit_insn () const
1870 int n_insns = 0;
1872 machine_mode mode = size_to_mode (m_size);
1873 rtx xsrc = NULL_RTX;
1874 rtx xscratch = NULL_RTX;
1876 gcc_assert (m_size > 0);
1878 switch (m_code)
1880 default:
1881 gcc_unreachable ();
1883 case CONST_INT:
1884 xsrc = gen_int_mode (m_isrc, mode);
1885 if (m_scratch && m_regno < REG_16)
1886 xscratch = gen_rtx_REG (QImode, m_scratch);
1887 break;
1889 case REG:
1890 gcc_assert (gpr_regno_p (m_rsrc, m_size));
1891 if (m_regno != m_rsrc)
1892 xsrc = gen_rtx_REG (mode, m_rsrc);
1893 break;
1896 if (xsrc)
1898 rtx dest = gen_rtx_REG (mode, m_regno);
1899 emit_valid_move_clobbercc (dest, xsrc, xscratch);
1900 n_insns += 1;
1903 return n_insns;
1907 // Entering a basic block means combining known register values from
1908 // all incoming BBs.
1909 void
1910 bbinfo_t::enter ()
1912 avr_dump ("\n;; Entering [bb %d]\n", bb->index);
1914 gcc_assert (! done);
1916 edge e;
1917 edge_iterator ei;
1918 gprmask_t pred_known_mask = ~0u;
1919 bbinfo_t *bbi = nullptr;
1921 // A quick iteration over all predecessors / incoming edges to reveal
1922 // whether this BB is worth a closer look.
1923 FOR_EACH_EDGE (e, ei, bb->preds)
1925 basic_block pred = e->src;
1926 bbi = & bb_info[pred->index];
1928 pred_known_mask &= bbi->regs.known;
1930 if (dump_file)
1932 avr_dump (";; [bb %d] <- [bb %d] ", e->dest->index, e->src->index);
1933 if (bbi->done)
1934 bbi->regs.dump ();
1935 else
1936 avr_dump (" (unknown)\n");
1940 // Only if all predecessors have already been handled, we can
1941 // have known values as we are entering the current BB.
1942 if (pred_known_mask != 0
1943 && bbi != nullptr)
1945 // Initialize current BB info from BI, an arbitrary predecessor.
1947 regs = bbi->regs;
1949 // Coalesce the output values from all predecessing BBs. At the
1950 // start of the current BB, a value is only known if it is known
1951 // in *all* predecessors and *all* these values are the same.
1952 FOR_EACH_EDGE (e, ei, bb->preds)
1954 regs.coalesce (bb_info[e->src->index].regs);
1958 if (dump_file)
1960 avr_dump (";; [bb %d] known at start: ", bb->index);
1961 if (regs.known)
1962 regs.dump ();
1963 else
1964 avr_dump (" (none)\n");
1965 avr_dump ("\n");
1970 void
1971 bbinfo_t::leave ()
1973 done = true;
1975 if (dump_file)
1976 fprintf (dump_file, ";; Leaving [bb %d]\n\n", bb->index);
1980 /* Initialize according to INSN which is a 1-byte single_set that's
1981 (effectively) a reg = reg or reg = const move. INSN may be the result
1982 of the current pass's optimization, e.g. something like INC R2 where R2
1983 has a known content. MEMO is the state prior to INSN. Only CONST
1984 cases are recorded; plus cases that are non-trivial for example when
1985 an XOR decays to a move. */
1987 bool
1988 insninfo_t::init1 (insn_optimize_data_t &iod, int max_size,
1989 const char *purpose = "")
1991 m_size = 0;
1992 m_insn = iod.insn;
1993 m_old_code = UNKNOWN;
1994 iod.unused = false;
1996 if (! iod.insn
1997 || ! (m_set = single_set_with_scratch (iod.insn, m_scratch)))
1998 return false;
2000 rtx dest = SET_DEST (m_set);
2001 machine_mode mode = GET_MODE (dest);
2002 const int n_bytes = GET_MODE_SIZE (mode);
2003 max_size = std::min (max_size, FUSE_MOVE_MAX_MODESIZE);
2005 if (! REG_P (dest)
2006 || END_REGNO (dest) > REG_32
2007 || n_bytes > max_size)
2008 return false;
2010 // Omit insns that (explicitly) touch fixed GPRs in any way.
2011 using elt0_getter_HRS = elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>;
2012 HARD_REG_SET hregs;
2013 CLEAR_HARD_REG_SET (hregs);
2014 find_all_hard_regs (PATTERN (iod.insn), & hregs);
2015 if (memento_t::fixed_regs_mask & (gprmask_t) elt0_getter_HRS::get (hregs))
2017 avr_dump (";; %sinit1 has fixed GPRs\n", purpose);
2018 return false;
2021 if ((iod.unused = find_reg_note (iod.insn, REG_UNUSED, dest)))
2022 return false;
2024 m_src = SET_SRC (m_set);
2025 m_regno = REGNO (dest);
2026 const rtx_code src_code = GET_CODE (m_src);
2028 m_ai = absint_t::explore (m_src, iod.regs, mode);
2030 if (m_ai.popcount ())
2032 if (m_ai.end_knows (CONST_INT) >= n_bytes)
2034 m_code = CONST_INT;
2035 m_old_code = CONSTANT_P (m_src) ? UNKNOWN : src_code;
2036 m_isrc = m_ai.get_value (n_bytes);
2037 m_size = n_bytes;
2039 else if (! REG_P (m_src)
2040 && n_bytes == 1
2041 && m_ai.end_knows (REG) >= n_bytes)
2043 m_code = REG;
2044 m_old_code = src_code;
2045 m_rsrc = m_ai[0].regno ();
2046 m_size = n_bytes;
2048 else if (n_bytes == 1)
2050 absint_byte_t &aib = m_new_src;
2051 aib = m_ai[0].find_alternative_binary (iod.regs);
2053 if (aib.arity () == 2
2054 && aib.arg (0).regno == m_regno)
2056 m_old_code = src_code;
2057 m_code = aib.get_code ();
2058 m_size = n_bytes;
2061 else if (n_bytes >= 2
2062 && m_ai.end_knows (VALUE) >= n_bytes)
2064 m_code = src_code;
2065 m_size = n_bytes;
2068 if (dump_file && m_size != 0)
2070 avr_dump (";; %sinit1 (%C", purpose,
2071 m_old_code ? m_old_code : m_code);
2072 if (m_old_code)
2073 avr_dump ("-> %C", m_code);
2074 avr_dump (") insn %d to R%d[%d] := %C:%m = ", INSN_UID (iod.insn),
2075 m_regno, n_bytes, src_code, mode);
2077 m_ai.dump (dest);
2079 if (dump_flags & TDF_FOLDING)
2080 avr_dump ("\n");
2084 return m_size != 0;
2088 // The private worker for .apply_insn().
2089 void
2090 memento_t::apply_insn1 (rtx_insn *insn, bool unused)
2092 gcc_assert (NONDEBUG_INSN_P (insn));
2094 if (INSN_CODE (insn) == CODE_FOR_set_some)
2096 // This insn only sets some selected bytes of register $3 of
2097 // modesize $4. If non-0, then $1 is a QImode scratch d-reg with
2098 // a known value of $2.
2100 const auto &xop = recog_data.operand;
2101 extract_insn (insn);
2102 gcc_assert (recog_data.n_operands == 7);
2103 gcc_assert (set_some_operation (xop[0], VOIDmode));
2105 const rtx &xscratch = xop[1];
2106 const rtx &xscratch_value = xop[2];
2107 const int sets_start = 5;
2109 for (int i = sets_start; i < XVECLEN (xop[0], 0); ++i)
2111 rtx xset = XVECEXP (xop[0], 0, i);
2112 avr_dump (";; set_some %r = %r\n", XEXP (xset, 0), XEXP (xset, 1));
2113 set_values (XEXP (xset, 0), XEXP (xset, 1));
2116 if (REG_P (xscratch))
2118 avr_dump (";; set_some %r = %r restore\n", xscratch, xscratch_value);
2119 set_values (xscratch, xscratch_value);
2122 return;
2123 } // CODE_FOR_set_some
2125 memento_t mold = *this;
2127 // When insn changes a register in whatever way, set it to "unknown".
2129 HARD_REG_SET rset;
2130 find_all_hard_reg_sets (insn, &rset, true /* implicit */);
2131 (*this) &= ~rset;
2133 rtx set = single_set (insn);
2134 rtx dest;
2136 if (! set
2137 || ! REG_P (dest = SET_DEST (set))
2138 || END_REGNO (dest) > REG_32
2139 || (regmask (dest) & memento_t::fixed_regs_mask))
2140 return;
2142 rtx src = SET_SRC (set);
2143 const rtx_code src_code = GET_CODE (src);
2144 const machine_mode mode = GET_MODE (dest);
2145 const int n_bytes = GET_MODE_SIZE (mode);
2147 // Insns that are too complicated or have a poor yield.
2148 // Just record which regs are clobberd / changed.
2149 if (n_bytes > FUSE_MOVE_MAX_MODESIZE
2150 || MEM_P (src)
2151 || (REG_P (src) && END_REGNO (src) > REG_32))
2153 // Comparisons may clobber the compared reg when it is unused after.
2154 if (src_code == COMPARE
2155 && REG_P (XEXP (src, 0))
2156 && CONSTANT_P (XEXP (src, 1)))
2158 rtx reg = XEXP (src, 0);
2159 for (unsigned r = REGNO (reg); r < END_REGNO (reg); ++r)
2160 set_unknown (r);
2162 return;
2165 if (unused)
2166 return;
2168 // Simulate the effect of some selected insns that are likely to produce
2169 // or propagate known values.
2171 // Get an abstract representation of src. Bytes may be unknown,
2172 // known to equal some 8-bit compile-time constant (CTC) value,
2173 // or are known to equal some 8-bit register.
2174 // TODO: Currently, only the ai[].val8 knowledge ist used.
2175 // What's the best way to make use of ai[].regno ?
2177 absint_t ai = absint_t::explore (src, mold, mode);
2179 if (ai.popcount ())
2181 avr_dump (";; apply_insn %d R%d[%d] := %C:%m = ", INSN_UID (insn),
2182 REGNO (dest), n_bytes, src_code, mode);
2183 ai.dump ();
2185 for (int i = 0; i < n_bytes; ++i)
2186 if (ai[i].can (CONST_INT))
2187 set_value (i + REGNO (dest), ai[i].val8 ());
2192 void
2193 memento_t::apply (const ply_t &p)
2195 if (p.is_movw ())
2197 copy_value (p.regno, p.arg);
2198 copy_value (p.regno + 1, p.arg + 1);
2200 else if (p.is_adiw ())
2202 int val = p.arg + values[p.regno] + 256 * values[1 + p.regno];
2203 set_value (p.regno, val);
2204 set_value (p.regno + 1, val >> 8);
2206 else if (p.size == 1)
2208 int x = values[p.regno];
2209 int y = values[p.arg];
2211 switch (p.code)
2213 default:
2214 gcc_unreachable ();
2215 break;
2217 case REG:
2218 copy_value (p.regno, p.arg);
2219 break;
2221 case SET:
2222 set_value (p.regno, p.arg);
2223 if (p.scratch >= REG_16)
2224 set_unknown (p.scratch);
2225 break;
2227 case MOD: // BLD
2228 gcc_assert (knows (p.regno));
2229 if (popcount_hwi (p.arg) == 1)
2230 values[p.regno] |= p.arg;
2231 else if (popcount_hwi (p.arg) == 7)
2232 values[p.regno] &= p.arg;
2233 else
2234 gcc_unreachable ();
2235 break;
2237 #define DO_ARITH(n_args, code, expr) \
2238 case code: \
2239 gcc_assert (knows (p.regno)); \
2240 if (n_args == 2) \
2241 gcc_assert (knows (p.arg)); \
2242 set_value (p.regno, expr); \
2243 break
2245 DO_ARITH (1, NEG, -x);
2246 DO_ARITH (1, NOT, ~x);
2247 DO_ARITH (1, PRE_INC, x + 1);
2248 DO_ARITH (1, PRE_DEC, x - 1);
2249 DO_ARITH (1, ROTATE, (x << 4) | (x >> 4));
2250 DO_ARITH (1, ASHIFT, x << 1);
2251 DO_ARITH (1, LSHIFTRT, x >> 1);
2252 DO_ARITH (1, ASHIFTRT, (x >> 1) | (x & 0x80));
2254 DO_ARITH (2, AND, x & y);
2255 DO_ARITH (2, IOR, x | y);
2256 DO_ARITH (2, XOR, x ^ y);
2257 DO_ARITH (2, PLUS, x + y);
2258 DO_ARITH (2, MINUS, x - y);
2259 #undef DO_ARITH
2261 } // size == 1
2262 else
2263 gcc_unreachable ();
2267 // Try to find a sequence of ply_t's that represent a II.m_regno = II.m_isrc
2268 // insn that sets a reg to a compile-time constant, and that is more
2269 // efficient than just a move insn. (When try_split_any_p is on, then
2270 // solutions that perform equal to a move insn are also allowed).
2271 // MEMO0 is the GPR state before II runs. A solution has been found
2272 // when .fpd->solution has at least one entry. LEN specifies the
2273 // depth of recursion, which works on the LEN-th ply_t.
2274 void
2275 bbinfo_t::find_plies (int len, const insninfo_t &ii, const memento_t &memo0)
2277 if (len > fpd->n_best_plys)
2278 return;
2280 memento_t memo = memo0;
2281 bool ply_applied_p = false;
2283 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2284 const bool extra = dump_file && (dump_flags & TDF_FOLDING);
2286 if (extra)
2288 fprintf (dump_file, ";; #%d (HAM=%d): get_plies R%d[%d] = ", len,
2289 ii.hamming (fpd->regs0), ii.m_regno, ii.m_size);
2290 fprintf (dump_file, "0x%0*" PRIx64 "\n",
2291 2 * ii.m_size, ii.m_isrc & size_to_mask (ii.m_size));
2294 plies_t &ps = fpd->plies[len - 1];
2296 const ply_t *const prev = len >= 2 ? fpd->ply_stack[len - 2] : nullptr;
2297 const ply_t *const prev2 = len >= 3 ? fpd->ply_stack[len - 3] : nullptr;
2299 bbinfo_t::get_plies (ps, ii, memo0, prev);
2301 #define NEXT(reason) \
2302 do { \
2303 if (extra) \
2304 fprintf (dump_file, ";; cont=%s\n", reason); \
2305 goto next; \
2306 } while (0)
2308 for (int ip = 0; ip < ps.n_plies; ++ip)
2310 const ply_t &p = ps.plies[ip];
2312 fpd->ply_stack[len - 1] = &p;
2314 if (0)
2315 next: continue;
2317 if (extra)
2318 ply_t::dump_plys (dump_file, len, 1, fpd->ply_stack + len - 1, memo0);
2320 // A MOVW with a Hamming distance of < 2 requires more plys.
2321 if (p.is_movw () && len + (2 - p.dhamming) > fpd->n_best_plys)
2322 NEXT ("movw.plys");
2324 if (len >= 2)
2326 // Destroying (parts of) the results of the previous ply
2327 // won't yield an optimal sequence.
2328 if (p.overrides (prev))
2329 NEXT ("overrides");
2331 // When two plys are independent of each other, then only
2332 // investigate sequences that operate on the higher reg first.
2333 // This canonicalization reduces the number of candidates,
2334 if (p.commutes_with (prev, ii.m_scratch)
2335 && p.regno > prev->regno)
2336 NEXT ("noncanonic");
2338 // Two subsequent BLDs touching the same register.
2339 if (p.is_bld ()
2340 && prev->is_bld ()
2341 && p.changes_result_of (prev))
2342 NEXT ("2bld");
2344 // When there is a BLD, then at least 2 of the same kind
2345 // shall occur in a row.
2346 if (prev->is_bld ()
2347 && ! p.is_bld ()
2348 && (len == 2
2349 || (prev->is_setbld () && ! prev2->is_setbld ())
2350 || (prev->is_cltbld () && ! prev2->is_cltbld ())))
2351 NEXT ("1bld");
2354 // The hamming delta of a MOVW may be less than 2, namely 0 or 1.
2355 // When the latter is the case, then a reasonable sequence must
2356 // modify the result of the MOVW.
2357 if (len >= 2
2358 && prev->is_movw ()
2359 && prev->dhamming == 1
2360 && ! p.changes_result_of (prev))
2361 NEXT ("movw.dh=1");
2363 if (len >= 3
2364 && prev2->is_movw ()
2365 && prev2->dhamming == 0
2366 && ! p.changes_result_of (prev2))
2367 NEXT ("movw.dh=0");
2369 // When setting an n-byte destination, then at most n/2 MOVWs
2370 // will occur in an optimal sequence.
2371 int n_movw = 0;
2372 for (int i = 0; i < len; ++i)
2373 n_movw += fpd->ply_stack[i]->is_movw ();
2374 if (n_movw > ii.m_size / 2)
2375 NEXT ("movws");
2377 if (ply_applied_p)
2378 memo = memo0;
2380 memo.apply (p);
2382 ply_applied_p = true;
2384 // Calculate the cost of the sequence we have so far. Scale by some
2385 // factor so that we can express that ADIW is more expensive than MOVW
2386 // because it is slower, but without defeating MOVW.
2387 const int SCALE = 4;
2389 int penal = 0;
2390 int cost = SCALE * 0;
2392 bool movw_p = 0;
2393 for (int i = 0; i < len; ++i)
2395 bool adiw_p = fpd->ply_stack[i]->is_adiw ();
2396 cost += SCALE * fpd->ply_stack[i]->cost + adiw_p;
2397 penal += adiw_p;
2398 movw_p |= fpd->ply_stack[i]->is_movw ();
2400 penal += movw_p;
2402 const int hamm = ii.hamming (memo);
2404 // The current Hamming distance yields a lower bound of how many
2405 // plys are still required. Consider that future cost already now.
2406 int future_cost = AVR_HAVE_MOVW || (AVR_HAVE_ADIW && ii.m_regno >= REG_22)
2407 ? (1 + hamm) / 2
2408 : hamm;
2410 // Similarly, when MOVW doesn't decrease the Hamming distance by 2,
2411 // then we know that at least 2 - dhamming plys must follow in the
2412 // future. (MOVW + ADIW will not occur.)
2413 if (p.is_movw ())
2414 future_cost = std::max (future_cost, 2 - p.dhamming);
2416 if (extra && future_cost)
2417 avr_dump (";; future cost = %d, dh=%d\n", future_cost, hamm);
2419 cost += SCALE * future_cost;
2421 bool profitable = (cost < SCALE * fpd->max_ply_cost
2422 || (bbinfo_t::try_split_any_p
2423 && fpd->solution.n_plies == 0
2424 && cost / SCALE <= fpd->max_ply_cost
2425 && cost / SCALE == fpd->movmode_cost));
2426 if (! profitable)
2428 if (extra)
2429 avr_dump (";; cont=cost %d+%d/%d\n", cost / SCALE, penal, SCALE);
2430 continue;
2433 if (hamm)
2435 // Go down that rabbit hole.
2436 gcc_assert (ply_applied_p);
2437 bbinfo_t::find_plies (1 + len, ii, memo);
2438 continue;
2441 // Found a solution that's better than everything so far.
2443 // Reduce the upper cost bound according to the found solution.
2444 // No future solution will be more expensive.
2445 fpd->max_ply_cost = cost / SCALE;
2447 fpd->solution = plies_t (len, fpd->ply_stack);
2449 if (dump_file)
2451 avr_dump (";; #%d FOUND COST = %d%s\n", len, cost / SCALE,
2452 penal ? " with penalty" : "");
2453 ply_t::dump_plys (dump_file, 0, len, fpd->ply_stack, fpd->regs0);
2454 if (extra)
2455 avr_dump (";; END\n");
2457 } // for ply_t's
2459 #undef NEXT
2463 // Run .find_plies() and return true when .fpd->solution is a sequence of ply_t's
2464 // that represents II, a REG = CONST insn. MEMO is the GPR state prior to II.
2465 bool
2466 bbinfo_t::run_find_plies (const insninfo_t &ii, const memento_t &memo) const
2468 fpd->solution.reset ();
2469 fpd->regs0 = memo;
2470 fpd->n_get_plies = 0;
2472 const int hamm = ii.hamming (memo);
2474 if (hamm == 0)
2476 avr_dump (";; Found redundant insn %d\n",
2477 ii.m_insn ? INSN_UID (ii.m_insn) : 0);
2478 return true;
2481 // Upper bound (in words) for any solution that's better than mov<mode>.
2482 // Will be decreased by find plies as it finds better solutions.
2483 fpd->movmode_cost = ii.cost ();
2484 fpd->max_ply_cost = fpd->movmode_cost;
2486 // With a non-zero Hamming distance, this insn will require at least one
2487 // instruction. When the upper bound for required instructions is that
2488 // small, then the current insn is good enough.
2489 if (fpd->max_ply_cost <= 1)
2490 return false;
2492 fpd->n_best_plys = ii.n_best_plys (hamm);
2493 gcc_assert (fpd->n_best_plys <= N_BEST_PLYS);
2495 if (dump_file)
2497 const uint64_t mask = size_to_mask (ii.m_size);
2498 fprintf (dump_file, ";; find_plies R%d[%d] = 0x%0*" PRIx64,
2499 ii.m_regno, ii.m_size, 2 * ii.m_size, ii.m_isrc & mask);
2500 if (ii.m_scratch)
2501 fprintf (dump_file, ", scratch=r%d", ii.m_scratch);
2502 memo.dump ("\n;; regs%s\n");
2505 avr_dump (";; mov<mode> cost = %d\n", fpd->max_ply_cost);
2506 avr_dump (";; max plys = %d\n", fpd->n_best_plys);
2507 ply_t::n_ply_ts = 0;
2509 find_plies (1, ii, memo);
2511 avr_dump (";; get_plies called %d times\n", fpd->n_get_plies);
2512 avr_dump (";; n_ply_ts = %d\n", ply_t::n_ply_ts);
2513 ply_t::max_n_ply_ts = std::max (ply_t::max_n_ply_ts, ply_t::n_ply_ts);
2515 return fpd->solution.n_plies != 0;
2519 // Try to propagate __zero_reg__ to a mem = reg insn's source.
2520 // Returns true on success and sets .n_new_insns.
2521 bool
2522 optimize_data_t::try_mem0 (bbinfo_t *)
2524 rtx_insn *insn = curr.ii.m_insn;
2525 rtx set, mem, reg;
2526 machine_mode mode;
2528 if (insn
2529 && (set = single_set (insn))
2530 && MEM_P (mem = SET_DEST (set))
2531 && REG_P (reg = SET_SRC (set))
2532 && GET_MODE_SIZE (mode = GET_MODE (mem)) <= 4
2533 && END_REGNO (reg) <= REG_32
2534 && ! (regmask (reg) & memento_t::fixed_regs_mask)
2535 && curr.regs.have_value (REGNO (reg), GET_MODE_SIZE (mode), 0x0))
2537 avr_dump (";; Found insn %d: mem:%m = 0 = r%d\n", INSN_UID (insn),
2538 mode, REGNO (reg));
2540 // Some insns like PUSHes don't clobber REG_CC.
2541 bool clobbers_cc = GET_CODE (PATTERN (insn)) == PARALLEL;
2543 if (clobbers_cc)
2544 emit_valid_move_clobbercc (mem, CONST0_RTX (mode));
2545 else
2546 emit_valid_insn (gen_rtx_SET (mem, CONST0_RTX (mode)));
2548 n_new_insns = 1;
2550 return true;
2553 return false;
2557 // Try to fuse two 1-byte insns .prev and .curr to one 2-byte insn (MOVW).
2558 // Returns true on success, and sets .n_new_insns, .ignore_mask etc.
2559 bool
2560 optimize_data_t::try_fuse (bbinfo_t *bbi)
2562 insninfo_t comb;
2564 if (! prev.ii.m_size
2565 || ! curr.ii.m_size
2566 || ! comb.combine (prev.ii, curr.ii))
2567 return false;
2569 avr_dump (";; Working on fuse of insn %d + insn %d = 0x%04x\n",
2570 INSN_UID (prev.insn), INSN_UID (curr.insn),
2571 (unsigned) comb.m_isrc);
2573 bool found = bbi->run_find_plies (comb, prev.regs);
2574 if (found)
2576 avr_dump (";; Found fuse of insns %d and %d\n",
2577 INSN_UID (prev.insn), INSN_UID (curr.insn));
2579 n_new_insns = bbinfo_t::fpd->solution.emit_insns (comb, prev.regs);
2580 delete_prev_p = true;
2582 if (prev.ii.m_scratch)
2583 ignore_mask |= regmask (prev.ii.m_scratch, 1);
2584 if (curr.ii.m_scratch)
2585 ignore_mask |= regmask (curr.ii.m_scratch, 1);
2586 ignore_mask &= ~regmask (comb.m_regno, comb.m_size);
2589 return found;
2593 // Try to replace an arithmetic 1-byte insn by a reg-reg move.
2594 // Returns true on success, and sets .n_new_insns etc.
2595 bool
2596 optimize_data_t::try_simplify (bbinfo_t *)
2598 if (curr.ii.m_size == 1
2599 && curr.ii.m_old_code != REG
2600 && curr.ii.m_code == REG)
2602 avr_dump (";; Found simplify of insn %d\n", INSN_UID (curr.insn));
2604 n_new_insns = curr.ii.emit_insn ();
2606 return true;
2609 return false;
2613 // Try to replace XEXP (*, 1) of a binary operation by a cheaper expression.
2614 // Returns true on success; sets .n_new_insns, .ignore_mask, .delete_prev_p.
2615 bool
2616 optimize_data_t::try_bin_arg1 (bbinfo_t *)
2618 if (curr.ii.m_size != 1
2619 || curr.ii.m_new_src.arity () != 2
2620 || curr.unused)
2621 return false;
2623 avr_dump (";; Working on bin_arg1 insn %d\n", INSN_UID (curr.insn));
2625 gcc_assert (curr.ii.m_src && BINARY_P (curr.ii.m_src));
2626 rtx xarg1_old = XEXP (curr.ii.m_src, 1);
2628 const absint_byte_t &aib = curr.ii.m_new_src;
2629 const absint_val_t &arg0 = aib.arg (0);
2630 const absint_val_t &arg1 = aib.arg (1);
2631 const absint_val_t &arg1_old = curr.ii.m_ai[0].arg (1);
2633 rtx src = NULL_RTX;
2635 if (CONSTANT_P (xarg1_old))
2637 // Sometimes, we allow expensive constants as 2nd operand like
2638 // in R2 += 2 which produces two INCs. When we have the
2639 // constant handy in a reg, then use that instead of the constant.
2640 const rtx_code code = aib.get_code ();
2641 gcc_assert (arg1.val8 == (INTVAL (xarg1_old) & 0xff));
2643 if (AVRasm::constant_cost (code, arg0.regno, arg1.val8) > 1)
2644 src = aib.to_rtx ();
2646 else if (REG_P (xarg1_old)
2647 && dead_or_set_p (curr.insn, xarg1_old))
2649 src = aib.to_rtx ();
2651 // The 2nd operand is a reg with a known content that dies
2652 // at the current insn. Chances are high that the register
2653 // holds a reload value only used by the current insn.
2654 if (prev.ii.m_size == 1
2655 && rtx_equal_p (xarg1_old, SET_DEST (prev.ii.m_set))
2656 && CONSTANT_P (prev.ii.m_src))
2658 avr_dump (";; Found dying reload insn %d\n", INSN_UID (prev.insn));
2660 delete_prev_p = true;
2661 ignore_mask = regmask (arg1_old.regno, 1);
2665 if (src)
2667 rtx dest = SET_DEST (curr.ii.m_set);
2669 avr_dump (";; Found bin_arg1 for insn %d: ", INSN_UID (curr.insn));
2670 avr_dump ("%C:%m %r", curr.ii.m_code, GET_MODE (dest), xarg1_old);
2671 aib.dump (" = %s\n");
2673 emit_valid_move_clobbercc (dest, src);
2674 n_new_insns = 1;
2677 return src != NULL_RTX;
2681 // Try to replace a REG = CONST insn by a cheaper sequence.
2682 // Returns true on success, and sets .n_new_insns, .ignore_mask etc.
2683 bool
2684 optimize_data_t::try_split_ldi (bbinfo_t *bbi)
2686 if (! curr.ii.m_size
2687 || curr.unused
2688 || curr.ii.m_code != CONST_INT
2689 || (! bbinfo_t::try_split_any_p
2690 // Finding plys will only ever succeed when there are
2691 // regs with a known value.
2692 && ! (curr.regs.known
2693 || (AVR_HAVE_MOVW
2694 && curr.ii.m_regno < REG_16 && curr.ii.m_size == 4))))
2695 return false;
2697 avr_dump (";; Working on split_ldi insn %d\n", INSN_UID (curr.insn));
2699 bool found = bbi->run_find_plies (curr.ii, curr.regs);
2700 if (found)
2702 avr_dump (";; Found split for ldi insn %d\n", INSN_UID (curr.insn));
2704 n_new_insns = bbinfo_t::fpd->solution.emit_insns (curr.ii, curr.regs);
2706 if (curr.ii.m_scratch)
2707 ignore_mask = regmask (curr.ii.m_scratch, 1);
2710 return found;
2714 // Helper for try_split_any().
2715 bool
2716 optimize_data_t::fail (const char *reason)
2718 n_new_insns = -1;
2720 if (dump_file)
2721 fprintf (dump_file, ";; Giving up split_any: %s\n", reason);
2723 return false;
2727 // Helper for try_split_any().
2728 rtx_insn *
2729 optimize_data_t::emit_and_apply_move (memento_t &memo, rtx dest, rtx src)
2731 rtx_insn *insn = emit_valid_move_clobbercc (dest, src);
2732 n_new_insns += 1;
2733 memo.apply_insn (insn, false);
2735 return insn;
2739 // Set X0 and X1 so that they are operands valid for a andqi3, iorqi3, xorqi3
2740 // or addqi3 insn with destination R_DEST. The method loads X1 to
2741 // a scratch reg as needed and records the GPR effect in IOD.regs.
2742 // EXTRA_COST are extra costs in units of words of insns that cost more
2743 // than one instruction. This is a helper for try_split_any().
2744 bool
2745 optimize_data_t
2746 ::get_2ary_operands (rtx_code &code, const absint_byte_t &aib,
2747 insn_optimize_data_t &iod, int r_dest,
2748 absint_val_t &x0, absint_val_t &x1, int &extra_cost)
2750 if (code != IOR && code != AND && code != XOR && code != PLUS)
2751 return fail ("2ary: unknown code");
2753 x0 = aib.arg (0);
2754 x1 = aib.arg (1);
2756 if (! x0.knows_regno ()
2757 || x1.clueless ())
2758 return fail ("2ary: clueless");
2760 int val8 = x1.val8;
2761 int val8_cost = val8 < 0 ? 100 : AVRasm::constant_cost (code, r_dest, val8);
2763 if (x0.regno == r_dest
2764 && (x1.knows_regno ()
2765 || val8_cost <= 1))
2767 if (code == XOR
2768 && val8 == 0x80
2769 && x0.regno >= REG_16)
2771 // xorxi3 can only "r,0,r".
2772 // x0 ^ 0x80 <=> x0 - 0x80.
2773 x1.regno = 0;
2774 code = MINUS;
2776 return true;
2779 const bool and_1_bit = code == AND && popcount_hwi (val8) == 1;
2780 // andqi3 has a "r,r,Cb1" alternative where Cb1 has exactly 1 bit set.
2781 // This can accommodate bytes of higher AND Cb<N> alternatives.
2782 if (x0.regno != r_dest)
2784 if (and_1_bit)
2786 extra_cost += 1 + (r_dest < REG_16);
2787 return true;
2789 else if (x1.regno == r_dest)
2791 std::swap (x0, x1);
2792 return true;
2794 return fail ("2ary is a 3-operand insn");
2797 // Now we have:
2798 // 1) r_dest = x0.regno, and
2799 // 2) x1 is val8, and
2800 // 3) x1 costs 2.
2802 const bool needs_scratch_p = select<bool>()
2803 : code == XOR ? true
2804 : code == AND ? popcount_hwi (val8) != 7
2805 : code == IOR ? popcount_hwi (val8) != 1
2806 : code == PLUS ? IN_RANGE (val8, 3, 0xff - 3)
2807 : bad_case<bool> ();
2809 const int r_val8 = iod.regs.regno_with_value (val8, 0 /* excludes: none */);
2810 if (r_val8)
2812 // Found a reg that already holds the constant.
2813 x1.val8 = -1;
2814 x1.regno = r_val8;
2815 return true;
2817 else if (iod.ii.m_scratch)
2819 // Using the insn's scratch reg.
2820 rtx xdst = gen_rtx_REG (QImode, iod.ii.m_scratch);
2821 rtx xsrc = gen_int_mode (x1.val8, QImode);
2822 emit_and_apply_move (iod.regs, xdst, xsrc);
2824 x1.regno = iod.ii.m_scratch;
2825 x1.val8 = -1;
2827 return true;
2829 else if (! needs_scratch_p)
2831 // Some constants (1 and -1) can be loaded without a scratch.
2832 extra_cost += 1;
2833 return true;
2835 else if (and_1_bit)
2837 // This can always fall back to BST + CLR + BLD, but may be cheaper.
2838 extra_cost += 1 + (r_dest < REG_16);
2839 return true;
2842 return fail ("2ary: expensive constant");
2846 static inline bool
2847 any_shift_p (rtx_code code)
2849 return code == LSHIFTRT || code == ASHIFTRT || code == ASHIFT;
2852 // Try to split .curr into a sequence of 1-byte insns.
2853 // Returns true on success. Sets .n_new_insns and .ignore_mask.
2854 bool
2855 optimize_data_t::try_split_any (bbinfo_t *)
2857 if (curr.ii.m_size < 2
2858 // Constants are split by split_ldi.
2859 || CONSTANT_P (curr.ii.m_src)
2860 // Splitting requires knowledge about what to do with each byte.
2861 || curr.ii.m_ai.end_knows (VALUE) < curr.ii.m_size)
2862 return false;
2864 avr_dump (";; Working on split_any %C:%m insn %d\n", curr.ii.m_code,
2865 GET_MODE (SET_DEST (curr.ii.m_set)), INSN_UID (curr.insn));
2867 const insninfo_t &ii = curr.ii;
2868 const int n_bytes = ii.m_size;
2869 int extra_cost = 0;
2870 int binop_cost = -1;
2872 // For plain AND, IOR, XOR get the current cost in units of words.
2873 if (BINARY_P (curr.ii.m_src))
2875 const rtx_code code = curr.ii.m_code;
2876 if ((code == IOR || code == AND || code == XOR)
2877 && REG_P (XEXP (curr.ii.m_src, 0))
2878 && CONSTANT_P (XEXP (curr.ii.m_src, 1)))
2880 binop_cost = get_attr_length (curr.insn);
2881 avr_dump (";; Competing against %C:%m cost = %d\n", code,
2882 GET_MODE (curr.ii.m_src), binop_cost);
2886 // Step 1: Work out conflicts and which sign extends to perform.
2888 const gprmask_t regs_dest = regmask (ii.m_regno, n_bytes);
2889 int r_sign = 0;
2890 gprmask_t regs_signs = 0;
2891 bool has_lsl = false;
2892 bool has_lsr = false;
2894 for (int i = 0; i < n_bytes; ++i)
2896 const absint_byte_t &aib = ii.m_ai[i];
2897 const int r_dest = ii.m_regno + i;
2898 const gprmask_t regs_src = aib.reg_mask ();
2900 // When only regs to the right are used, or only regs to the left
2901 // are used, then there's no conflict like it is arising for rotates.
2902 // For now, only implement conflict-free splits.
2903 has_lsl |= has_bits_in (regs_src & regs_dest, 0, r_dest - 1);
2904 has_lsr |= has_bits_in (regs_src & regs_dest, r_dest + 1, 31);
2905 if (has_lsl && has_lsr)
2906 return fail ("has both << and >>");
2908 if (aib.get_code () == SIGN_EXTEND)
2910 const absint_val_t x0 = aib.arg (0);
2911 if (! r_sign)
2912 r_sign = x0.regno;
2913 else if (r_sign != x0.regno)
2914 return fail ("too many signs");
2916 // Signs are handled below after all the other bytes.
2917 regs_signs |= regmask (r_dest, 1);
2921 // Step 2: Work on the individual bytes and emit according insns.
2923 n_new_insns = 0;
2924 memento_t memo = curr.regs;
2926 const int step = has_lsl ? -1 : 1;
2927 const int istart = step == 1 ? 0 : n_bytes - 1;
2928 const int iend = step == 1 ? n_bytes : -1;
2930 for (int i = istart; i != iend; i += step)
2932 const absint_byte_t &aib = ii.m_ai[i];
2933 const int r_dest = ii.m_regno + i;
2934 rtx_code code = aib.get_code ();
2935 rtx xsrc = NULL_RTX;
2936 rtx xdest = gen_rtx_REG (QImode, r_dest);
2938 if (code == SET)
2940 const int r_src = aib.regno (false);
2941 const int val8 = aib.val8 (false);
2942 int r16;
2944 // A no-op...
2945 if (r_dest == r_src)
2946 continue;
2947 // ...or an existing 16-bit constant...
2948 else if (AVR_HAVE_MOVW
2949 && i + step != iend
2950 // Next is not a no-op.
2951 && ii.m_ai[i + step].regno (false) != r_dest + step
2952 // Eligible for MOVW.
2953 && r_dest + step == (r_dest ^ 1)
2954 && r_dest % 2 == i % 2
2955 && (r16 = ii.m_ai.reg16_with_value (i, i + step, memo)))
2957 xdest = gen_rtx_REG (HImode, r_dest & ~1);
2958 xsrc = gen_rtx_REG (HImode, r16);
2959 i += step;
2961 // ...or a reg-reg move from a multi-byte move...
2962 else if (r_src
2963 // Prefer a reg-reg move over a (potential) load
2964 // of a constant, because the subsequent RTL
2965 // peephole pass may combine it to a MOVW again.
2966 && AVR_HAVE_MOVW
2967 && REG_P (curr.ii.m_src))
2968 xsrc = gen_rtx_REG (QImode, r_src);
2969 // ...or a cheap constant...
2970 else if (val8 >= 0
2971 && AVRasm::constant_cost (SET, r_dest, val8) <= 1)
2972 xsrc = gen_int_mode (val8, QImode);
2973 // ...or a reg-reg move...
2974 else if (r_src)
2975 xsrc = gen_rtx_REG (QImode, r_src);
2976 // ...or a costly constant that already exists in some reg...
2977 else if (memo.regno_with_value (val8, 0 /* excludes: none */))
2978 xsrc = gen_rtx_REG (QImode, memo.regno_with_value (val8, 0));
2979 // ...or a costly constant loaded into curr.insn's scratch reg...
2980 else if (ii.m_scratch)
2982 rtx xscratch = gen_rtx_REG (QImode, ii.m_scratch);
2983 rtx xval8 = gen_int_mode (val8, QImode);
2984 emit_and_apply_move (memo, xscratch, xval8);
2985 xsrc = xscratch;
2987 // ...or a costly constant (1 or -1) that doesn't need a scratch.
2988 else if (! AVRasm::ldi_needs_scratch (r_dest, val8))
2990 extra_cost += 1;
2991 xsrc = gen_int_mode (val8, QImode);
2993 else
2994 return fail ("expensive val8");
2995 } // SET
2996 else if (aib.arity () == 1)
2998 if (aib.get_code () == SIGN_EXTEND)
2999 // Signs are handled after all the others.
3000 continue;
3001 else
3003 const absint_val_t x0 = aib.arg (0);
3004 rtx xop0 = gen_rtx_REG (QImode, x0.regno);
3005 xsrc = gen_rtx_fmt_e (code, QImode, xop0);
3007 } // unary
3008 else if (aib.arity () == 2)
3010 absint_val_t x0;
3011 absint_val_t x1;
3012 insn_optimize_data_t iod (memo);
3013 iod.ii = curr.ii;
3015 if (! get_2ary_operands (code, aib, iod, r_dest, x0, x1, extra_cost))
3016 return false;
3017 rtx xop0 = gen_rtx_REG (QImode, x0.regno);
3018 rtx xop1 = x1.knows_val8 ()
3019 ? gen_int_mode (x1.val8, QImode)
3020 : gen_rtx_REG (QImode, x1.regno);
3022 xsrc = gen_rtx_fmt_ee (code, QImode, xop0, xop1);
3023 } // binary
3025 if (! xsrc)
3026 return fail ("no source found");
3028 if (r_sign
3029 && (regmask (xdest) & regmask (r_sign, 1)))
3030 return fail ("clobbered r_sign");
3032 emit_and_apply_move (memo, xdest, xsrc);
3035 // Step 3: Emit insns for sign extend.
3036 // No more need to track memo beyond this point.
3038 if (! emit_signs (r_sign, regs_signs))
3039 return false;
3041 if (binop_cost >= 0)
3043 avr_dump (";; Expected cost: %d + %d\n", n_new_insns, extra_cost);
3044 if (n_new_insns + extra_cost > binop_cost)
3045 return fail ("too expensive");
3048 if (ii.m_scratch)
3049 ignore_mask = regmask (ii.m_scratch, 1);
3051 return true;
3055 // A helper for try_split_any() above.
3056 // Emit sign extends from R_MSB.7 to all regs in REGS_SIGNS.
3057 bool
3058 optimize_data_t::emit_signs (const int r_msb, gprmask_t regs_signs)
3060 if (! regs_signs)
3061 return true;
3062 else if (! r_msb)
3063 return fail ("fatal: no r_msb given");
3065 // Pick an arbitrary reg from the sign destinations when the source
3066 // isn't one of the signs.
3067 const int r_signs = regs_signs & regmask (r_msb, 1)
3068 ? r_msb
3069 : ctz_hwi (regs_signs);
3071 // Set all bits in r_signs according to the sign of r_msb using the
3072 // r,r,C07 alternative of ashrqi3.
3073 rtx xsrc = gen_rtx_fmt_ee (ASHIFTRT, QImode,
3074 gen_rtx_REG (QImode, r_msb), GEN_INT (7));
3075 emit_valid_move_clobbercc (gen_rtx_REG (QImode, r_signs), xsrc);
3076 regs_signs &= ~regmask (r_signs, 1);
3078 // Set up a 16-bit sign register if possible.
3079 int r16_signs = 0;
3080 if (regs_signs & regmask (r_signs ^ 1, 1))
3082 emit_move_mask (r_signs ^ 1, r_signs, 1, regs_signs);
3083 r16_signs = r_signs & ~1;
3086 // Handle all 16-bit signs regs provided MOVW.
3087 if (AVR_HAVE_MOVW)
3088 for (int r = FIRST_GPR; r < REG_32; r += 2)
3090 const gprmask_t m = regmask (r, 2);
3091 if ((m & regs_signs) == m)
3093 if (r16_signs)
3094 emit_move_mask (r, r16_signs, 2, regs_signs);
3095 else
3097 emit_move_mask (r + 0, r_signs, 1, regs_signs);
3098 emit_move_mask (r + 1, r_signs, 1, regs_signs);
3099 r16_signs = r;
3104 // Handle all remaining signs.
3105 while (regs_signs)
3106 emit_move_mask (ctz_hwi (regs_signs), r_signs, 1, regs_signs);
3108 return true;
3111 // Helper for the method above. Move N_BYTES registers from R_SRC to R_DST,
3112 // keeping track of which regs are still to be done in MASK.
3113 void
3114 optimize_data_t::emit_move_mask (int r_dst, int r_src, int n_bytes,
3115 gprmask_t &mask)
3117 const gprmask_t mask_dst = regmask (r_dst, n_bytes);
3118 const gprmask_t mask_src = regmask (r_src, n_bytes);
3119 gcc_assert ((mask_dst & mask) == mask_dst);
3120 gcc_assert ((mask_src & mask) == 0);
3121 rtx xdst = gen_rtx_REG (size_to_mode (n_bytes), r_dst);
3122 rtx xsrc = gen_rtx_REG (size_to_mode (n_bytes), r_src);
3123 emit_valid_move_clobbercc (xdst, xsrc);
3124 n_new_insns += 1;
3125 mask &= ~mask_dst;
3129 void
3130 bbinfo_t::optimize_one_block (bool &changed)
3132 memento_t prev_regs;
3134 rtx_insn *insn = next_nondebug_insn_bb (bb, BB_HEAD (bb));
3136 for (rtx_insn *next_insn; insn; insn = next_insn)
3138 next_insn = next_nondebug_insn_bb (bb, insn);
3140 avr_dump ("\n;; Working on insn %d\n%r\n\n", INSN_UID (insn), insn);
3142 optimize_data_t od (prev_regs, regs);
3144 od.prev.insn = prev_nondebug_insn_bb (bb, insn);
3145 od.curr.insn = insn;
3147 od.prev.ii.init1 (od.prev, 1, "IIprev ");
3148 od.curr.ii.init1 (od.curr, 8, "IIcurr ");
3150 start_sequence ();
3152 bool found = ((bbinfo_t::try_fuse_p && od.try_fuse (this))
3153 || (bbinfo_t::try_bin_arg1_p && od.try_bin_arg1 (this))
3154 || (bbinfo_t::try_simplify_p && od.try_simplify (this))
3155 || (bbinfo_t::try_split_ldi_p && od.try_split_ldi (this))
3156 || (bbinfo_t::try_split_any_p && od.try_split_any (this))
3157 || (bbinfo_t::try_mem0_p && od.try_mem0 (this)));
3159 rtx_insn *new_insns = get_insns ();
3160 end_sequence ();
3162 gcc_assert (found == (od.n_new_insns >= 0));
3164 ++tick;
3166 // This insn will become the previous one in the next loop iteration.
3167 // Just used in dumps.
3168 rtx_insn *new_curr_insn;
3170 if (! found)
3172 // Nothing changed.
3173 avr_dump (";; Keeping old route.\n");
3174 gcc_assert (! od.delete_prev_p);
3176 prev_regs = regs;
3177 regs.apply_insn (insn, false);
3179 new_curr_insn = insn;
3181 else
3183 // We have new_insns.
3184 changed = true;
3186 if (dump_file)
3188 avr_dump ("\n;; EMIT %d new insn%s replacing ",
3189 od.n_new_insns, "s" + (od.n_new_insns == 1));
3190 if (od.delete_prev_p)
3191 avr_dump ("insn %d and ", INSN_UID (od.prev.insn));
3192 avr_dump ("insn %d, delete_prev=%d:\n%L\n", INSN_UID (insn),
3193 od.delete_prev_p, new_insns);
3196 new_curr_insn = od.emit_sequence (bb, new_insns);
3197 } // found
3199 if (dump_file && new_curr_insn)
3201 avr_dump ("\n");
3203 const int d = regs.distance_to (prev_regs);
3204 if (d || new_curr_insn != insn)
3205 avr_dump (";; %d regs changed state:\n", d);
3207 if (new_curr_insn != insn)
3209 avr_dump (";; Befor insn %d", INSN_UID (new_curr_insn));
3210 prev_regs.dump ();
3213 avr_dump (";; After insn %d", INSN_UID (new_curr_insn));
3214 regs.dump ();
3216 } // for BB insns
3220 void
3221 bbinfo_t::optimize_one_function (function *func)
3223 bbinfo_t::fpd = XNEW (bbinfo_t::find_plies_data_t);
3224 bbinfo_t::bb_info = XCNEWVEC (bbinfo_t, last_basic_block_for_fn (func));
3225 int *post_order = XNEWVEC (int, n_basic_blocks_for_fn (func));
3227 plies_t::max_n_plies = 0;
3229 using elt0_getter_HRS = elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>;
3230 memento_t::fixed_regs_mask = (gprmask_t) elt0_getter_HRS::get (fixed_reg_set);
3232 // Option -mfuse-move=<0,23> provides a 3:2:2:2 mixed radix value:
3233 // -mfuse-move= 0 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 20 1 2 3 Digit
3234 // fuse 1 1 1 1 1 1 1 1 1 1 1 1 0
3235 // bin_arg1 1 1 1 1 1 1 1 1 1 1 1 1 1
3236 // split_any 1 1 1 1 1 1 1 1 1 1 1 1 2
3237 // split_ldi 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3
3238 // use arith 1 1 1 1 1 1 1 1 3
3240 // Which optimization(s) to perform.
3241 bbinfo_t::try_fuse_p = avropt_fuse_move & 0x1; // Digit 0 in [0, 1].
3242 bbinfo_t::try_mem0_p = avropt_fuse_move & 0x1; // Digit 0 in [0, 1].
3243 bbinfo_t::try_bin_arg1_p = avropt_fuse_move & 0x2; // Digit 1 in [0, 1].
3244 bbinfo_t::try_split_any_p = avropt_fuse_move & 0x4; // Digit 2 in [0, 1].
3245 bbinfo_t::try_split_ldi_p = avropt_fuse_move >> 3; // Digit 3 in [0, 2].
3246 bbinfo_t::use_arith_p = (avropt_fuse_move >> 3) >= 2; // Digit 3 in [0, 2].
3247 bbinfo_t::use_set_some_p = bbinfo_t::try_split_ldi_p; // Digit 3 in [0, 2].
3248 bbinfo_t::try_simplify_p = avropt_fuse_move != 0;
3250 // Topologically sort BBs from last to first.
3252 const int n_post_order = post_order_compute (post_order, false, false);
3253 bool changed = false;
3255 // Traverse the BBs from first to last in order to increase the chance
3256 // that register values from all incoming edges are known.
3258 for (int n = n_post_order - 1; n >= 0; --n)
3260 basic_block bb = BASIC_BLOCK_FOR_FN (func, post_order[n]);
3262 bbinfo_t::bb_info[bb->index].bb = bb;
3263 bbinfo_t::bb_info[bb->index].enter ();
3264 bbinfo_t::bb_info[bb->index].optimize_one_block (changed);
3265 bbinfo_t::bb_info[bb->index].leave ();
3268 if (plies_t::max_n_plies)
3269 avr_dump (";; max_n_plies=%d\n", (int) plies_t::max_n_plies);
3271 if (changed)
3273 df_note_add_problem ();
3274 df_analyze ();
3277 XDELETEVEC (post_order);
3278 XDELETEVEC (bbinfo_t::bb_info);
3279 XDELETE (bbinfo_t::fpd);
3282 } // anonymous namespace
3285 namespace
3289 //////////////////////////////////////////////////////////////////////////////
3290 // Try to replace 2 cbranch insns with 1 comparison and 2 branches.
3292 static const pass_data avr_pass_data_ifelse =
3294 RTL_PASS, // type
3295 "", // name (will be patched)
3296 OPTGROUP_NONE, // optinfo_flags
3297 TV_DF_SCAN, // tv_id
3298 0, // properties_required
3299 0, // properties_provided
3300 0, // properties_destroyed
3301 0, // todo_flags_start
3302 TODO_df_finish | TODO_df_verify // todo_flags_finish
3305 class avr_pass_ifelse : public rtl_opt_pass
3307 public:
3308 avr_pass_ifelse (gcc::context *ctxt, const char *name)
3309 : rtl_opt_pass (avr_pass_data_ifelse, ctxt)
3311 this->name = name;
3314 bool gate (function *) final override
3316 return optimize > 0;
3319 unsigned int execute (function *func) final override;
3320 }; // avr_pass_ifelse
3323 /* Return TRUE iff comparison code CODE is explicitly signed. */
3325 static bool
3326 avr_strict_signed_p (rtx_code code)
3328 return code == GT || code == GE || code == LT || code == LE;
3332 /* Return TRUE iff comparison code CODE is explicitly unsigned. */
3334 static bool
3335 avr_strict_unsigned_p (rtx_code code)
3337 return code == GTU || code == GEU || code == LTU || code == LEU;
3340 #include "config/avr/ranges.h"
3342 /* Suppose the inputs represent a code like
3344 if (x <CMP1> XVAL1) goto way1;
3345 if (x <CMP2> XVAL2) goto way2;
3346 way3:;
3348 with two integer mode comparisons where XVAL1 and XVAL2 are CONST_INT.
3349 When this can be rewritten in the form
3351 if (x <cond1> xval) goto way1;
3352 if (x <cond2> xval) goto way2;
3353 way3:;
3355 then set CMP1 = cond1, CMP2 = cond2, and return xval. Else return NULL_RTX.
3356 When SWAPT is returned true, then way1 and way2 must be swapped.
3357 When the incomping SWAPT is false, the outgoing one will be false, too. */
3359 static rtx
3360 avr_2comparisons_rhs (rtx_code &cmp1, rtx xval1,
3361 rtx_code &cmp2, rtx xval2,
3362 machine_mode mode, bool &swapt)
3364 const bool may_swapt = swapt;
3365 swapt = false;
3367 //////////////////////////////////////////////////////////////////
3368 // Step 0: Decide about signedness, map xval1/2 to the range
3369 // of [un]signed machine mode.
3371 const bool signed1_p = avr_strict_signed_p (cmp1);
3372 const bool signed2_p = avr_strict_signed_p (cmp2);
3373 const bool unsigned1_p = avr_strict_unsigned_p (cmp1);
3374 const bool unsigned2_p = avr_strict_unsigned_p (cmp2);
3375 const bool signed_p = signed1_p || signed2_p;
3376 bool unsigned_p = unsigned1_p || unsigned2_p;
3378 using T = Ranges::scalar_type;
3379 T val1 = INTVAL (xval1);
3380 T val2 = INTVAL (xval2);
3382 if (signed_p + unsigned_p > 1)
3384 // Don't go down that rabbit hole. When the RHSs are the
3385 // same, we can still save one comparison.
3386 return val1 == val2 ? xval1 : NULL_RTX;
3389 // Decide about signedness. When no explicit signedness is present,
3390 // then cases that are close to the unsigned boundary like EQ 0, EQ 1
3391 // can also be optimized.
3392 if (unsigned_p
3393 || (! signed_p && IN_RANGE (val1, -2, 2)))
3395 unsigned_p = true;
3396 val1 = UINTVAL (xval1) & GET_MODE_MASK (mode);
3397 val2 = UINTVAL (xval2) & GET_MODE_MASK (mode);
3400 // No way we can decompose the domain in a usable manner when the
3401 // RHSes are too far apart.
3402 if (! IN_RANGE (val1 - val2, -2, 2))
3403 return NULL_RTX;
3405 //////////////////////////////////////////////////////////////////
3406 // Step 1: Represent the input conditions as truth Ranges. This
3407 // establishes a decomposition / coloring of the domain.
3409 Ranges dom = Ranges::NBitsRanges (GET_MODE_BITSIZE (mode), unsigned_p,
3410 Ranges::ALL);
3411 Ranges r[4] = { dom, dom.truth (cmp1, val1), dom.truth (cmp2, val2), dom };
3413 // r[1] shadows r[2] shadows r[3]. r[0] is just for nice indices.
3414 r[3].minus (r[2]);
3415 r[3].minus (r[1]);
3416 r[2].minus (r[1]);
3418 //////////////////////////////////////////////////////////////////
3419 // Step 2: Filter for cases where the domain decomposes into three
3420 // intervals: One to the left, one to the right, and one
3421 // in the middle where the latter holds exactly one value.
3423 for (int i = 1; i <= 3; ++i)
3425 // Keep track of which Ranges is which.
3426 r[i].tag = i;
3428 gcc_assert (r[i].check ());
3430 // Filter for proper intervals. Also return for the empty set,
3431 // since cases where [m_min, m_max] decomposes into two intervals
3432 // or less have been sorted out by the generic optimizers already,
3433 // and hence should not be seen here. And more than two intervals
3434 // at a time cannot be optimized of course.
3435 if (r[i].size () != 1)
3436 return NULL_RTX;
3439 // Bubble-sort the three intervals such that:
3440 // [1] is the left interval, i.e. the one taken by LT[U].
3441 // [2] is the middle interval, i.e. the one taken by EQ.
3442 // [3] is the right interval, i.e. the one taken by GT[U].
3443 Ranges::sort2 (r[1], r[3]);
3444 Ranges::sort2 (r[2], r[3]);
3445 Ranges::sort2 (r[1], r[2]);
3447 if (dump_file)
3448 fprintf (dump_file,
3449 ";; Decomposed: .%d=[%ld, %ld] .%d=[%ld, %ld] .%d=[%ld, %ld]\n",
3450 r[1].tag, (long) r[1].ranges[0].lo, (long) r[1].ranges[0].hi,
3451 r[2].tag, (long) r[2].ranges[0].lo, (long) r[2].ranges[0].hi,
3452 r[3].tag, (long) r[3].ranges[0].lo, (long) r[3].ranges[0].hi);
3454 // EQ / NE can handle only one value.
3455 if (r[2].cardinality (0) != 1)
3456 return NULL_RTX;
3458 // Success! This is the sought for xval.
3459 const T val = r[2].ranges[0].lo;
3461 //////////////////////////////////////////////////////////////////
3462 // Step 3: Work out which label gets which condition, trying to
3463 // avoid the expensive codes GT[U] and LE[U] if possible.
3464 // Avoiding expensive codes is always possible when labels
3465 // way1 and way2 may be swapped.
3467 // The xx1 ways have an expensive GT for cmp1 which can be avoided
3468 // by swapping way1 with way2.
3469 swapt = may_swapt && r[3].tag == 1;
3470 if (swapt)
3471 std::swap (r[3], r[2].tag == 2 ? r[2] : r[1]);
3473 // 6 = 3! ways to assign LT, EQ, GT to the three labels.
3474 const int way = 100 * r[1].tag + 10 * r[2].tag + r[3].tag;
3476 if (dump_file)
3477 fprintf (dump_file, ";; Success: unsigned=%d, swapt=%d, way=%d, rhs=%ld\n",
3478 unsigned_p, swapt, way, (long) val);
3480 #define WAY(w, c1, c2) \
3481 case w: \
3482 cmp1 = unsigned_p ? unsigned_condition (c1) : c1; \
3483 cmp2 = unsigned_p ? unsigned_condition (c2) : c2; \
3484 break;
3486 switch (way)
3488 default:
3489 gcc_unreachable ();
3491 // cmp1 gets the LT, avoid difficult branches for cmp2.
3492 WAY (123, LT, EQ);
3493 WAY (132, LT, NE);
3495 // cmp1 gets the EQ, avoid difficult branches for cmp2.
3496 WAY (213, EQ, LT);
3497 WAY (312, EQ, GE);
3499 // cmp1 gets the difficult GT, unavoidable as we may not swap way1/2.
3500 WAY (231, GT, NE);
3501 WAY (321, GT, EQ);
3504 #undef WAY
3506 return gen_int_mode (val, mode);
3510 /* A helper for the next method. Suppose we have two conditional branches
3511 with REG and CONST_INT operands
3513 if (reg <cond1> xval1) goto label1;
3514 if (reg <cond2> xval2) goto label2;
3516 If the second comparison is redundant and there are codes <cmp1>
3517 and <cmp2> such that the sequence can be performed as
3519 REG_CC = compare (reg, xval);
3520 if (REG_CC <cmp1> 0) goto label1;
3521 if (REG_CC <cmp2> 0) goto label2;
3523 then set COND1 to cmp1, COND2 to cmp2, SWAPT to true when the branch
3524 targets have to be swapped, and return XVAL. Otherwise, return NULL_RTX.
3525 This function may clobber COND1 and COND2 even when it returns NULL_RTX.
3527 REVERSE_COND1 can be set to reverse condition COND1. This is useful
3528 when the second comparison does not follow the first one, but is
3529 located after label1 like in:
3531 if (reg <cond1> xval1) goto label1;
3533 label1:
3534 if (reg <cond2> xval2) goto label2;
3536 In such a case we cannot swap the labels, and we may end up with a
3537 difficult branch -- though one comparison can still be optimized out.
3538 Getting rid of such difficult branches would require to reorder blocks. */
3540 static rtx
3541 avr_redundant_compare (rtx xreg1, rtx_code &cond1, rtx xval1,
3542 rtx xreg2, rtx_code &cond2, rtx xval2,
3543 bool reverse_cond1, bool &swapt)
3545 // Make sure we have two REG <cond> CONST_INT comparisons with the same reg.
3546 if (! rtx_equal_p (xreg1, xreg2)
3547 || ! CONST_INT_P (xval1)
3548 || ! CONST_INT_P (xval2))
3549 return NULL_RTX;
3551 if (reverse_cond1)
3552 cond1 = reverse_condition (cond1);
3554 // Allow swapping label1 <-> label2 only when ! reverse_cond1.
3555 swapt = ! reverse_cond1;
3556 rtx_code c1 = cond1;
3557 rtx_code c2 = cond2;
3558 rtx xval = avr_2comparisons_rhs (c1, xval1,
3559 c2, xval2, GET_MODE (xreg1), swapt);
3560 if (! xval)
3561 return NULL_RTX;
3563 if (dump_file)
3565 rtx_code a1 = reverse_cond1 ? reverse_condition (cond1) : cond1;
3566 rtx_code b1 = reverse_cond1 ? reverse_condition (c1) : c1;
3567 const char *s_rev1 = reverse_cond1 ? " reverse_cond1" : "";
3568 avr_dump (";; cond1: %C %r%s\n", a1, xval1, s_rev1);
3569 avr_dump (";; cond2: %C %r\n", cond2, xval2);
3570 avr_dump (";; => %C %d\n", b1, (int) INTVAL (xval));
3571 avr_dump (";; => %C %d\n", c2, (int) INTVAL (xval));
3574 cond1 = c1;
3575 cond2 = c2;
3577 return xval;
3581 /* Similar to the function above, but assume that
3583 if (xreg1 <cond1> xval1) goto label1;
3584 if (xreg2 <cond2> xval2) goto label2;
3586 are two subsequent REG-REG comparisons. When this can be represented as
3588 REG_CC = compare (reg, xval);
3589 if (REG_CC <cmp1> 0) goto label1;
3590 if (REG_CC <cmp2> 0) goto label2;
3592 then set XREG1 to reg, COND1 and COND2 accordingly, and return xval.
3593 Otherwise, return NULL_RTX. This optmization can be performed
3594 when { xreg1, xval1 } and { xreg2, xval2 } are equal as sets.
3595 It can be done in such a way that no difficult branches occur. */
3597 static rtx
3598 avr_redundant_compare_regs (rtx &xreg1, rtx_code &cond1, rtx &xval1,
3599 rtx &xreg2, rtx_code &cond2, rtx &xval2,
3600 bool reverse_cond1)
3602 bool swapped;
3604 if (! REG_P (xval1))
3605 return NULL_RTX;
3606 else if (rtx_equal_p (xreg1, xreg2)
3607 && rtx_equal_p (xval1, xval2))
3608 swapped = false;
3609 else if (rtx_equal_p (xreg1, xval2)
3610 && rtx_equal_p (xreg2, xval1))
3611 swapped = true;
3612 else
3613 return NULL_RTX;
3615 // Found a redundant REG-REG comparison. Assume that the incoming
3616 // representation has been canonicalized by CANONICALIZE_COMPARISON.
3617 // We can always represent this using only one comparison and in such
3618 // a way that no difficult branches are required.
3620 if (dump_file)
3622 const char *s_rev1 = reverse_cond1 ? " reverse_cond1" : "";
3623 avr_dump (";; %r %C %r%s\n", xreg1, cond1, xval1, s_rev1);
3624 avr_dump (";; %r %C %r\n", xreg2, cond2, xval2);
3627 if (reverse_cond1)
3628 cond1 = reverse_condition (cond1);
3630 if (swapped)
3632 if (cond1 == EQ || cond1 == NE)
3634 avr_dump (";; case #21\n");
3635 std::swap (xreg1, xval1);
3637 else
3639 std::swap (xreg2, xval2);
3640 cond2 = swap_condition (cond2);
3642 // The swap may have introduced a difficult comparison.
3643 // In order to get of it, only a few cases need extra care.
3644 if ((cond1 == LT && cond2 == GT)
3645 || (cond1 == LTU && cond2 == GTU))
3647 avr_dump (";; case #22\n");
3648 cond2 = NE;
3650 else
3651 avr_dump (";; case #23\n");
3654 else
3655 avr_dump (";; case #20\n");
3657 return xval1;
3661 /* INSN1 and INSN2 are two cbranch insns for the same integer mode.
3662 When FOLLOW_LABEL1 is false, then INSN2 is located in the fallthrough
3663 path of INSN1. When FOLLOW_LABEL1 is true, then INSN2 is located at
3664 the true edge of INSN1, INSN2 is preceded by a barrier, and no other
3665 edge leads to the basic block of INSN2.
3667 Try to replace INSN1 and INSN2 by a compare insn and two branch insns.
3668 When such a replacement has been performed, then return the insn where the
3669 caller should continue scanning the insn stream. Else, return nullptr. */
3671 static rtx_insn *
3672 avr_optimize_2ifelse (rtx_jump_insn *insn1,
3673 rtx_jump_insn *insn2, bool follow_label1)
3675 avr_dump (";; Investigating jump_insn %d and jump_insn %d.\n",
3676 INSN_UID (insn1), INSN_UID (insn2));
3678 // Extract the operands of the insns:
3679 // $0 = comparison operator ($1, $2)
3680 // $1 = reg
3681 // $2 = reg or const_int
3682 // $3 = code_label
3683 // $4 = optional SCRATCH for HI, PSI, SI cases.
3685 const auto &op = recog_data.operand;
3687 extract_insn (insn1);
3688 rtx xop1[5] = { op[0], op[1], op[2], op[3], op[4] };
3689 int n_operands = recog_data.n_operands;
3691 extract_insn (insn2);
3692 rtx xop2[5] = { op[0], op[1], op[2], op[3], op[4] };
3694 rtx_code code1 = GET_CODE (xop1[0]);
3695 rtx_code code2 = GET_CODE (xop2[0]);
3696 bool swap_targets = false;
3698 // Search redundant REG-REG comparison.
3699 rtx xval = avr_redundant_compare_regs (xop1[1], code1, xop1[2],
3700 xop2[1], code2, xop2[2],
3701 follow_label1);
3703 // Search redundant REG-CONST_INT comparison.
3704 if (! xval)
3705 xval = avr_redundant_compare (xop1[1], code1, xop1[2],
3706 xop2[1], code2, xop2[2],
3707 follow_label1, swap_targets);
3708 if (! xval)
3710 avr_dump (";; Nothing found for jump_insn %d and jump_insn %d.\n",
3711 INSN_UID (insn1), INSN_UID (insn2));
3712 return nullptr;
3715 if (follow_label1)
3716 code1 = reverse_condition (code1);
3718 //////////////////////////////////////////////////////
3719 // Found a replacement.
3721 if (dump_file)
3723 avr_dump (";; => %C %r\n", code1, xval);
3724 avr_dump (";; => %C %r\n", code2, xval);
3726 fprintf (dump_file, "\n;; Found chain of jump_insn %d and"
3727 " jump_insn %d, follow_label1=%d:\n",
3728 INSN_UID (insn1), INSN_UID (insn2), follow_label1);
3729 print_rtl_single (dump_file, PATTERN (insn1));
3730 print_rtl_single (dump_file, PATTERN (insn2));
3733 rtx_insn *next_insn
3734 = next_nonnote_nondebug_insn (follow_label1 ? insn1 : insn2);
3736 // Pop the new branch conditions and the new comparison.
3737 // Prematurely split into compare + branch so that we can drop
3738 // the 2nd comparison. The following pass, split2, splits all
3739 // insns for REG_CC, and it should still work as usual even when
3740 // there are already some REG_CC insns around.
3742 rtx xcond1 = gen_rtx_fmt_ee (code1, VOIDmode, cc_reg_rtx, const0_rtx);
3743 rtx xcond2 = gen_rtx_fmt_ee (code2, VOIDmode, cc_reg_rtx, const0_rtx);
3744 rtx xpat1 = gen_branch (xop1[3], xcond1);
3745 rtx xpat2 = gen_branch (xop2[3], xcond2);
3746 rtx xcompare = NULL_RTX;
3747 machine_mode mode = GET_MODE (xop1[1]);
3749 if (mode == QImode)
3751 gcc_assert (n_operands == 4);
3752 xcompare = gen_cmpqi3 (xop1[1], xval);
3754 else
3756 gcc_assert (n_operands == 5);
3757 rtx scratch = GET_CODE (xop1[4]) == SCRATCH ? xop2[4] : xop1[4];
3758 rtx (*gen_cmp)(rtx,rtx,rtx)
3759 = mode == HImode ? gen_gen_comparehi
3760 : mode == PSImode ? gen_gen_comparepsi
3761 : gen_gen_comparesi; // SImode
3762 xcompare = gen_cmp (xop1[1], xval, scratch);
3765 // Emit that stuff.
3767 rtx_insn *cmp = emit_insn_before (xcompare, insn1);
3768 rtx_jump_insn *branch1 = emit_jump_insn_after (xpat1, insn1);
3769 rtx_jump_insn *branch2 = emit_jump_insn_after (xpat2, insn2);
3771 JUMP_LABEL (branch1) = xop1[3];
3772 JUMP_LABEL (branch2) = xop2[3];
3773 // delete_insn() decrements LABEL_NUSES when deleting a JUMP_INSN,
3774 // but when we pop a new JUMP_INSN, do it by hand.
3775 ++LABEL_NUSES (xop1[3]);
3776 ++LABEL_NUSES (xop2[3]);
3778 delete_insn (insn1);
3779 delete_insn (insn2);
3781 if (swap_targets)
3783 gcc_assert (! follow_label1);
3785 basic_block to1 = BLOCK_FOR_INSN (xop1[3]);
3786 basic_block to2 = BLOCK_FOR_INSN (xop2[3]);
3787 edge e1 = find_edge (BLOCK_FOR_INSN (branch1), to1);
3788 edge e2 = find_edge (BLOCK_FOR_INSN (branch2), to2);
3789 gcc_assert (e1);
3790 gcc_assert (e2);
3791 redirect_edge_and_branch (e1, to2);
3792 redirect_edge_and_branch (e2, to1);
3795 // As a side effect, also recog the new insns.
3796 gcc_assert (valid_insn_p (cmp));
3797 gcc_assert (valid_insn_p (branch1));
3798 gcc_assert (valid_insn_p (branch2));
3800 return next_insn;
3804 /* Sequences like
3806 SREG = compare (reg, 1 + val);
3807 if (SREG >= 0) goto label1;
3808 SREG = compare (reg, val);
3809 if (SREG == 0) goto label2;
3811 can be optimized to
3813 SREG = compare (reg, val);
3814 if (SREG == 0) goto label2;
3815 if (SREG >= 0) goto label1;
3817 Almost all cases where one of the comparisons is redundant can
3818 be transformed in such a way that only one comparison is required
3819 and no difficult branches are needed. */
3821 unsigned int
3822 avr_pass_ifelse::execute (function *)
3824 rtx_insn *next_insn;
3826 for (rtx_insn *insn = get_insns (); insn; insn = next_insn)
3828 next_insn = next_nonnote_nondebug_insn (insn);
3830 if (! next_insn)
3831 break;
3833 // Search for two cbranch insns. The first one is a cbranch.
3834 // Filter for "cbranch<mode>4_insn" with mode in QI, HI, PSI, SI.
3836 if (! JUMP_P (insn))
3837 continue;
3839 int icode1 = recog_memoized (insn);
3841 if (icode1 != CODE_FOR_cbranchqi4_insn
3842 && icode1 != CODE_FOR_cbranchhi4_insn
3843 && icode1 != CODE_FOR_cbranchpsi4_insn
3844 && icode1 != CODE_FOR_cbranchsi4_insn)
3845 continue;
3847 rtx_jump_insn *insn1 = as_a<rtx_jump_insn *> (insn);
3849 // jmp[0]: We can optimize cbranches that follow cbranch insn1.
3850 rtx_insn *jmp[2] = { next_insn, nullptr };
3852 // jmp[1]: A cbranch following the label of cbranch insn1.
3853 if (LABEL_NUSES (JUMP_LABEL (insn1)) == 1)
3855 rtx_insn *code_label1 = JUMP_LABEL_AS_INSN (insn1);
3856 rtx_insn *barrier = prev_nonnote_nondebug_insn (code_label1);
3858 // When the target label of insn1 is used exactly once and is
3859 // not a fallthrough, i.e. is preceded by a barrier, then
3860 // consider the insn following that label.
3861 if (barrier && BARRIER_P (barrier))
3862 jmp[1] = next_nonnote_nondebug_insn (code_label1);
3865 // With almost certainty, only one of the two possible jumps can
3866 // be optimized with insn1, but it's hard to tell which one a priori.
3867 // Just try both. In the unlikely case where both could be optimized,
3868 // prefer jmp[0] because eliminating difficult branches is impeded
3869 // by following label1.
3871 for (int j = 0; j < 2; ++j)
3872 if (jmp[j] && JUMP_P (jmp[j])
3873 && recog_memoized (jmp[j]) == icode1)
3875 rtx_insn *next
3876 = avr_optimize_2ifelse (insn1, as_a<rtx_jump_insn *> (jmp[j]),
3877 j == 1 /* follow_label1 */);
3878 if (next)
3880 next_insn = next;
3881 break;
3885 } // loop insns
3887 return 0;
3892 //////////////////////////////////////////////////////////////////////////////
3893 // Optimize results of the casesi expander for modes < SImode.
3895 static const pass_data avr_pass_data_casesi =
3897 RTL_PASS, // type
3898 "", // name (will be patched)
3899 OPTGROUP_NONE, // optinfo_flags
3900 TV_DF_SCAN, // tv_id
3901 0, // properties_required
3902 0, // properties_provided
3903 0, // properties_destroyed
3904 0, // todo_flags_start
3905 0 // todo_flags_finish
3908 class avr_pass_casesi : public rtl_opt_pass
3910 public:
3911 avr_pass_casesi (gcc::context *ctxt, const char *name)
3912 : rtl_opt_pass (avr_pass_data_casesi, ctxt)
3914 this->name = name;
3917 bool gate (function *) final override
3919 return optimize > 0;
3922 unsigned int execute (function *) final override;
3923 }; // avr_pass_casesi
3926 /* Make one parallel insn with all the patterns from insns i[0]..i[5]. */
3928 static rtx_insn *
3929 avr_parallel_insn_from_insns (rtx_insn *i[5])
3931 rtvec vec = gen_rtvec (5, PATTERN (i[0]), PATTERN (i[1]), PATTERN (i[2]),
3932 PATTERN (i[3]), PATTERN (i[4]));
3933 start_sequence ();
3934 emit (gen_rtx_PARALLEL (VOIDmode, vec));
3935 rtx_insn *insn = get_insns ();
3936 end_sequence ();
3938 return insn;
3942 /* Return true if we see an insn stream generated by casesi expander together
3943 with an extension to SImode of the switch value.
3945 If this is the case, fill in the insns from casesi to INSNS[1..5] and
3946 the SImode extension to INSNS[0]. Moreover, extract the operands of
3947 pattern casesi_<mode>_sequence forged from the sequence to recog_data. */
3949 static bool
3950 avr_is_casesi_sequence (basic_block bb, rtx_insn *insn, rtx_insn *insns[5])
3952 rtx set_4, set_0;
3954 /* A first and quick test for a casesi sequences. As a side effect of
3955 the test, harvest respective insns to INSNS[0..4]. */
3957 if (!(JUMP_P (insns[4] = insn)
3958 // casesi is the only insn that comes up with UNSPEC_INDEX_JMP,
3959 // hence the following test ensures that we are actually dealing
3960 // with code from casesi.
3961 && (set_4 = single_set (insns[4]))
3962 && UNSPEC == GET_CODE (SET_SRC (set_4))
3963 && UNSPEC_INDEX_JMP == XINT (SET_SRC (set_4), 1)
3965 && (insns[3] = prev_real_insn (insns[4]))
3966 && (insns[2] = prev_real_insn (insns[3]))
3967 && (insns[1] = prev_real_insn (insns[2]))
3969 // Insn prior to casesi.
3970 && (insns[0] = prev_real_insn (insns[1]))
3971 && (set_0 = single_set (insns[0]))
3972 && extend_operator (SET_SRC (set_0), SImode)))
3974 return false;
3977 if (dump_file)
3979 fprintf (dump_file, ";; Sequence from casesi in "
3980 "[bb %d]:\n\n", bb->index);
3981 for (int i = 0; i < 5; i++)
3982 print_rtl_single (dump_file, insns[i]);
3985 /* We have to deal with quite some operands. Extracting them by hand
3986 would be tedious, therefore wrap the insn patterns into a parallel,
3987 run recog against it and then use insn extract to get the operands. */
3989 rtx_insn *xinsn = avr_parallel_insn_from_insns (insns);
3991 INSN_CODE (xinsn) = recog (PATTERN (xinsn), xinsn, NULL /* num_clobbers */);
3993 /* Failing to recognize means that someone changed the casesi expander or
3994 that some passes prior to this one performed some unexpected changes.
3995 Gracefully drop such situations instead of aborting. */
3997 if (INSN_CODE (xinsn) < 0)
3999 if (dump_file)
4000 fprintf (dump_file, ";; Sequence not recognized, giving up.\n\n");
4002 return false;
4005 gcc_assert (CODE_FOR_casesi_qi_sequence == INSN_CODE (xinsn)
4006 || CODE_FOR_casesi_hi_sequence == INSN_CODE (xinsn));
4008 extract_insn (xinsn);
4010 // Assert on the anatomy of xinsn's operands we are going to work with.
4012 gcc_assert (recog_data.n_operands == 12);
4013 gcc_assert (recog_data.n_dups == 3);
4015 if (dump_file)
4017 fprintf (dump_file, ";; Operands extracted:\n");
4018 for (int i = 0; i < recog_data.n_operands; i++)
4019 avr_fdump (dump_file, ";; $%d = %r\n", i, recog_data.operand[i]);
4020 fprintf (dump_file, "\n");
4023 return true;
4027 /* INSNS[1..4] is a sequence as generated by casesi and INSNS[0] is an
4028 extension of an 8-bit or 16-bit integer to SImode. XOP contains the
4029 operands of INSNS as extracted by insn_extract from pattern
4030 casesi_<mode>_sequence:
4032 $0: SImode reg switch value as result of $10.
4033 $1: Negative of smallest index in switch.
4034 $2: Number of entries in switch.
4035 $3: Label to table.
4036 $4: Label if out-of-bounds.
4037 $5: $0 + $1.
4038 $6: 3-byte PC: subreg:HI ($5) + label_ref ($3)
4039 2-byte PC: subreg:HI ($5)
4040 $7: HI reg index into table (Z or pseudo)
4041 $8: Z or scratch:HI (to be clobbered)
4042 $9: R24 or const0_rtx (to be clobbered)
4043 $10: Extension to SImode of an 8-bit or 16-bit integer register $11.
4044 $11: QImode or HImode register input of $10.
4046 Try to optimize this sequence, i.e. use the original HImode / QImode
4047 switch value instead of SImode. */
4049 static void
4050 avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
4052 // Original mode of the switch value; this is QImode or HImode.
4053 machine_mode mode = GET_MODE (xop[11]);
4055 // How the original switch value was extended to SImode; this is
4056 // SIGN_EXTEND or ZERO_EXTEND.
4057 rtx_code code = GET_CODE (xop[10]);
4059 // Lower index, upper index (plus one) and range of case calues.
4060 HOST_WIDE_INT low_idx = -INTVAL (xop[1]);
4061 HOST_WIDE_INT num_idx = INTVAL (xop[2]);
4062 HOST_WIDE_INT hig_idx = low_idx + num_idx;
4064 // Maximum ranges of (un)signed QImode resp. HImode.
4065 unsigned umax = QImode == mode ? 0xff : 0xffff;
4066 int imax = QImode == mode ? 0x7f : 0x7fff;
4067 int imin = -imax - 1;
4069 // Testing the case range and whether it fits into the range of the
4070 // (un)signed mode. This test should actually always pass because it
4071 // makes no sense to have case values outside the mode range. Notice
4072 // that case labels which are unreachable because they are outside the
4073 // mode of the switch value (e.g. "case -1" for uint8_t) have already
4074 // been thrown away by the middle-end.
4076 if (SIGN_EXTEND == code
4077 && low_idx >= imin
4078 && hig_idx <= imax)
4080 // ok
4082 else if (ZERO_EXTEND == code
4083 && low_idx >= 0
4084 && (unsigned) hig_idx <= umax)
4086 // ok
4088 else
4090 if (dump_file)
4091 fprintf (dump_file, ";; Case ranges too big, giving up.\n\n");
4092 return;
4095 // Do normalization of switch value $10 and out-of-bound check in its
4096 // original mode instead of in SImode. Use a newly created pseudo.
4097 // This will replace insns[1..2].
4099 start_sequence ();
4101 rtx reg = copy_to_mode_reg (mode, xop[11]);
4103 rtx (*gen_add)(rtx,rtx,rtx) = QImode == mode ? gen_addqi3 : gen_addhi3;
4104 rtx (*gen_cbranch)(rtx,rtx,rtx,rtx)
4105 = QImode == mode ? gen_cbranchqi4 : gen_cbranchhi4;
4107 emit_insn (gen_add (reg, reg, gen_int_mode (-low_idx, mode)));
4108 rtx op0 = reg; rtx op1 = gen_int_mode (num_idx, mode);
4109 rtx labelref = copy_rtx (xop[4]);
4110 rtx xbranch = gen_cbranch (gen_rtx_fmt_ee (GTU, VOIDmode, op0, op1),
4111 op0, op1, labelref);
4112 rtx_insn *cbranch = emit_jump_insn (xbranch);
4113 JUMP_LABEL (cbranch) = xop[4];
4114 ++LABEL_NUSES (xop[4]);
4116 rtx_insn *seq1 = get_insns ();
4117 rtx_insn *last1 = get_last_insn ();
4118 end_sequence ();
4120 emit_insn_after (seq1, insns[2]);
4122 // After the out-of-bounds test and corresponding branch, use a
4123 // 16-bit index. If QImode is used, extend it to HImode first.
4124 // This will replace insns[4].
4126 start_sequence ();
4128 if (QImode == mode)
4129 reg = force_reg (HImode, gen_rtx_fmt_e (code, HImode, reg));
4131 rtx pat_4 = AVR_3_BYTE_PC
4132 ? gen_movhi (xop[7], reg)
4133 : gen_addhi3 (xop[7], reg, gen_rtx_LABEL_REF (VOIDmode, xop[3]));
4135 emit_insn (pat_4);
4137 rtx_insn *seq2 = get_insns ();
4138 rtx_insn *last2 = get_last_insn ();
4139 end_sequence ();
4141 emit_insn_after (seq2, insns[3]);
4143 if (dump_file)
4145 fprintf (dump_file, ";; New insns: ");
4147 for (rtx_insn *insn = seq1; ; insn = NEXT_INSN (insn))
4149 fprintf (dump_file, "%d, ", INSN_UID (insn));
4150 if (insn == last1)
4151 break;
4153 for (rtx_insn *insn = seq2; ; insn = NEXT_INSN (insn))
4155 fprintf (dump_file, "%d%s", INSN_UID (insn),
4156 insn == last2 ? ".\n\n" : ", ");
4157 if (insn == last2)
4158 break;
4161 fprintf (dump_file, ";; Deleting insns: %d, %d, %d.\n\n",
4162 INSN_UID (insns[1]), INSN_UID (insns[2]), INSN_UID (insns[3]));
4165 // Pseudodelete the SImode and subreg of SImode insns. We don't care
4166 // about the extension insns[0]: Its result is now unused and other
4167 // passes will clean it up.
4169 SET_INSN_DELETED (insns[1]);
4170 SET_INSN_DELETED (insns[2]);
4171 SET_INSN_DELETED (insns[3]);
4175 unsigned int
4176 avr_pass_casesi::execute (function *func)
4178 basic_block bb;
4180 FOR_EACH_BB_FN (bb, func)
4182 rtx_insn *insn, *insns[5];
4184 FOR_BB_INSNS (bb, insn)
4186 if (avr_is_casesi_sequence (bb, insn, insns))
4188 avr_optimize_casesi (insns, recog_data.operand);
4193 return 0;
4196 } // anonymous namespace
4198 /* Perform some extra checks on operands of casesi_<mode>_sequence.
4199 Not all operand dependencies can be described by means of predicates.
4200 This function performs left over checks and should always return true.
4201 Returning false means that someone changed the casesi expander but did
4202 not adjust casesi_<mode>_sequence. */
4204 bool
4205 avr_casei_sequence_check_operands (rtx *xop)
4207 rtx sub_5 = NULL_RTX;
4209 if (AVR_HAVE_EIJMP_EICALL
4210 // The last clobber op of the tablejump.
4211 && xop[9] == all_regs_rtx[REG_24])
4213 // $6 is: (subreg:SI ($5) 0)
4214 sub_5 = xop[6];
4217 if (!AVR_HAVE_EIJMP_EICALL
4218 // $6 is: (plus:HI (subreg:SI ($5) 0)
4219 // (label_ref ($3)))
4220 && PLUS == GET_CODE (xop[6])
4221 && LABEL_REF == GET_CODE (XEXP (xop[6], 1))
4222 && rtx_equal_p (xop[3], XEXP (XEXP (xop[6], 1), 0))
4223 // The last clobber op of the tablejump.
4224 && xop[9] == const0_rtx)
4226 sub_5 = XEXP (xop[6], 0);
4229 if (sub_5
4230 && SUBREG_P (sub_5)
4231 && SUBREG_BYTE (sub_5) == 0
4232 && rtx_equal_p (xop[5], SUBREG_REG (sub_5)))
4233 return true;
4235 if (dump_file)
4236 fprintf (dump_file, "\n;; Failed condition for casesi_<mode>_sequence\n\n");
4238 return false;
4241 namespace
4245 //////////////////////////////////////////////////////////////////////////////
4246 // Find more POST_INC and PRE_DEC cases.
4248 static const pass_data avr_pass_data_fuse_add =
4250 RTL_PASS, // type
4251 "", // name (will be patched)
4252 OPTGROUP_NONE, // optinfo_flags
4253 TV_MACH_DEP, // tv_id
4254 0, // properties_required
4255 0, // properties_provided
4256 0, // properties_destroyed
4257 0, // todo_flags_start
4258 TODO_df_finish // todo_flags_finish
4261 class avr_pass_fuse_add : public rtl_opt_pass
4263 public:
4264 avr_pass_fuse_add (gcc::context *ctxt, const char *name)
4265 : rtl_opt_pass (avr_pass_data_fuse_add, ctxt)
4267 this->name = name;
4270 // Cloning is required because we are running one instance of the pass
4271 // before peephole2. and a second one after cprop_hardreg.
4272 opt_pass * clone () final override
4274 return make_avr_pass_fuse_add (m_ctxt);
4277 unsigned int execute (function *func) final override
4279 func->machine->n_avr_fuse_add_executed += 1;
4280 n_avr_fuse_add_executed = func->machine->n_avr_fuse_add_executed;
4282 if (optimize && avropt_fuse_add > 0)
4283 return execute1 (func);
4284 return 0;
4287 unsigned int execute1 (function *);
4289 struct Some_Insn
4291 rtx_insn *insn = nullptr;
4292 rtx dest, src;
4293 bool valid () const { return insn != nullptr; }
4294 void set_deleted ()
4296 gcc_assert (insn);
4297 SET_INSN_DELETED (insn);
4298 insn = nullptr;
4302 // If .insn is not NULL, then this is a reg:HI += const_int
4303 // of an address register.
4304 struct Add_Insn : Some_Insn
4306 rtx addend;
4307 int regno;
4308 Add_Insn () {}
4309 Add_Insn (rtx_insn *insn);
4312 // If .insn is not NULL, then this sets an address register
4313 // to a constant value.
4314 struct Ldi_Insn : Some_Insn
4316 int regno;
4317 Ldi_Insn () {}
4318 Ldi_Insn (rtx_insn *insn);
4321 // If .insn is not NULL, then this is a load or store insn where the
4322 // address is REG or POST_INC with an address register.
4323 struct Mem_Insn : Some_Insn
4325 rtx reg_or_0, mem, addr, addr_reg;
4326 int addr_regno;
4327 rtx_code addr_code;
4328 machine_mode mode;
4329 addr_space_t addr_space;
4330 bool store_p, volatile_p;
4331 Mem_Insn () {}
4332 Mem_Insn (rtx_insn *insn);
4335 rtx_insn *fuse_ldi_add (Ldi_Insn &prev_ldi, Add_Insn &add);
4336 rtx_insn *fuse_add_add (Add_Insn &prev_add, Add_Insn &add);
4337 rtx_insn *fuse_add_mem (Add_Insn &prev_add, Mem_Insn &mem);
4338 rtx_insn *fuse_mem_add (Mem_Insn &prev_mem, Add_Insn &add);
4339 }; // avr_pass_fuse_add
4342 /* Describe properties of AVR's indirect load and store instructions
4343 LD, LDD, ST, STD, LPM, ELPM depending on register number, volatility etc.
4344 Rules for "volatile" accesses are:
4346 | Xmega | non-Xmega
4347 ------+-----------------+----------------
4348 load | read LSB first | read LSB first
4349 store | write LSB first | write MSB first
4352 struct AVR_LdSt_Props
4354 bool has_postinc, has_predec, has_ldd;
4355 // The insn printers will use POST_INC or PRE_DEC addressing, no matter
4356 // what adressing modes we are feeding into them.
4357 bool want_postinc, want_predec;
4359 AVR_LdSt_Props (int regno, bool store_p, bool volatile_p, addr_space_t as)
4361 bool generic_p = ADDR_SPACE_GENERIC_P (as);
4362 bool flashx_p = (! generic_p
4363 && as != ADDR_SPACE_MEMX && as != ADDR_SPACE_FLASHX);
4364 has_postinc = generic_p || (flashx_p && regno == REG_Z);
4365 has_predec = generic_p;
4366 has_ldd = ! AVR_TINY && generic_p && (regno == REG_Y || regno == REG_Z);
4367 want_predec = volatile_p && generic_p && ! AVR_XMEGA && store_p;
4368 want_postinc = volatile_p && generic_p && (AVR_XMEGA || ! store_p);
4369 want_postinc |= flashx_p && regno == REG_Z;
4372 AVR_LdSt_Props (const avr_pass_fuse_add::Mem_Insn &m)
4373 : AVR_LdSt_Props (m.addr_regno, m.store_p, m.volatile_p, m.addr_space)
4375 gcc_assert (m.valid ());
4380 /* Emit a single_set that clobbers REG_CC. */
4382 static rtx_insn *
4383 emit_move_ccc (rtx dest, rtx src)
4385 return emit_insn (gen_gen_move_clobbercc (dest, src));
4389 /* Emit a single_set that clobbers REG_CC after insn AFTER. */
4391 static rtx_insn *
4392 emit_move_ccc_after (rtx dest, rtx src, rtx_insn *after)
4394 return emit_insn_after (gen_gen_move_clobbercc (dest, src), after);
4397 static bool
4398 reg_seen_between_p (const_rtx reg, const rtx_insn *from, const rtx_insn *to)
4400 return (reg_used_between_p (reg, from, to)
4401 || reg_set_between_p (reg, from, to));
4405 static void
4406 avr_maybe_adjust_cfa (rtx_insn *insn, rtx reg, int addend)
4408 if (addend
4409 && frame_pointer_needed
4410 && REGNO (reg) == FRAME_POINTER_REGNUM
4411 && avropt_fuse_add == 3)
4413 rtx plus = plus_constant (Pmode, reg, addend);
4414 RTX_FRAME_RELATED_P (insn) = 1;
4415 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (reg, plus));
4420 // If successful, this represents a SET of a pointer register to a constant.
4421 avr_pass_fuse_add::Ldi_Insn::Ldi_Insn (rtx_insn *insn)
4423 rtx set = single_set (insn);
4424 if (!set)
4425 return;
4427 src = SET_SRC (set);
4428 dest = SET_DEST (set);
4430 if (REG_P (dest)
4431 && GET_MODE (dest) == Pmode
4432 && IN_RANGE (regno = REGNO (dest), REG_X, REG_Z)
4433 && CONSTANT_P (src))
4435 this->insn = insn;
4439 // If successful, this represents a PLUS with CONST_INT of a pointer
4440 // register X, Y or Z. Otherwise, the object is not valid().
4441 avr_pass_fuse_add::Add_Insn::Add_Insn (rtx_insn *insn)
4443 rtx set = single_set (insn);
4444 if (!set)
4445 return;
4447 src = SET_SRC (set);
4448 dest = SET_DEST (set);
4449 if (REG_P (dest)
4450 // We are only interested in PLUSes that change address regs.
4451 && GET_MODE (dest) == Pmode
4452 && IN_RANGE (regno = REGNO (dest), REG_X, REG_Z)
4453 && PLUS == GET_CODE (src)
4454 && rtx_equal_p (XEXP (src, 0), dest)
4455 && CONST_INT_P (XEXP (src, 1)))
4457 // This is reg:HI += const_int.
4458 addend = XEXP (src, 1);
4459 this->insn = insn;
4463 // If successful, this represents a load or store insn where the addressing
4464 // mode uses pointer register X, Y or Z. Otherwise, the object is not valid().
4465 avr_pass_fuse_add::avr_pass_fuse_add::Mem_Insn::Mem_Insn (rtx_insn *insn)
4467 rtx set = single_set (insn);
4468 if (!set)
4469 return;
4471 src = SET_SRC (set);
4472 dest = SET_DEST (set);
4473 mode = GET_MODE (dest);
4475 if (MEM_P (dest)
4476 && (REG_P (src) || src == CONST0_RTX (mode)))
4478 reg_or_0 = src;
4479 mem = dest;
4481 else if (REG_P (dest) && MEM_P (src))
4483 reg_or_0 = dest;
4484 mem = src;
4486 else
4487 return;
4489 if (avr_mem_memx_p (mem)
4490 || avr_load_libgcc_p (mem))
4491 return;
4493 addr = XEXP (mem, 0);
4494 addr_code = GET_CODE (addr);
4496 if (addr_code == REG)
4497 addr_reg = addr;
4498 else if (addr_code == POST_INC || addr_code == PRE_DEC)
4499 addr_reg = XEXP (addr, 0);
4500 else
4501 return;
4503 addr_regno = REGNO (addr_reg);
4505 if (avropt_fuse_add == 2
4506 && frame_pointer_needed
4507 && addr_regno == FRAME_POINTER_REGNUM)
4508 MEM_VOLATILE_P (mem) = 0;
4510 if (reg_overlap_mentioned_p (reg_or_0, addr) // Can handle CONSTANT_P.
4511 || addr_regno > REG_Z
4512 || avr_mem_memx_p (mem)
4513 // The following optimizations only handle REG and POST_INC,
4514 // so that's all what we allow here.
4515 || (addr_code != REG && addr_code != POST_INC))
4516 return;
4518 addr_space = MEM_ADDR_SPACE (mem);
4519 volatile_p = MEM_VOLATILE_P (mem);
4520 store_p = MEM_P (dest);
4522 // Turn this "valid".
4523 this->insn = insn;
4526 /* Try to combine a Ldi insn with a PLUS CONST_INT addend to one Ldi insn.
4527 If LDI is valid, then it precedes ADD in the same block.
4528 When a replacement is found, a new insn is emitted and the old insns
4529 are pseudo-deleted. The returned insn is the point where the calling
4530 scanner should continue. When no replacement is found, nullptr is
4531 returned and nothing changed. */
4533 rtx_insn *
4534 avr_pass_fuse_add::fuse_ldi_add (Ldi_Insn &ldi, Add_Insn &add)
4536 if (! ldi.valid ()
4537 || reg_seen_between_p (ldi.dest, ldi.insn, add.insn))
4539 // If something is between the Ldi and the current insn, we can
4540 // set the Ldi invalid to speed future scans.
4541 return ldi.insn = nullptr;
4544 // Found a Ldi with const and a PLUS insns in the same BB,
4545 // and with no interfering insns between them.
4547 // Emit new Ldi with the sum of the original offsets after the old Ldi.
4548 rtx xval = plus_constant (Pmode, ldi.src, INTVAL (add.addend));
4550 rtx_insn *insn = emit_move_ccc_after (ldi.dest, xval, ldi.insn);
4551 avr_dump (";; new Ldi[%d] insn %d after %d: R%d = %r\n\n", ldi.regno,
4552 INSN_UID (insn), INSN_UID (ldi.insn), ldi.regno, xval);
4554 rtx_insn *next = NEXT_INSN (add.insn);
4555 ldi.set_deleted ();
4556 add.set_deleted ();
4558 return next;
4561 /* Try to combine two PLUS insns with CONST_INT addend to one such insn.
4562 If PREV_ADD is valid, then it precedes ADD in the same basic block.
4563 When a replacement is found, a new insn is emitted and the old insns
4564 are pseudo-deleted. The returned insn is the point where the calling
4565 scanner should continue. When no replacement is found, nullptr is
4566 returned and nothing changed. */
4568 rtx_insn *
4569 avr_pass_fuse_add::fuse_add_add (Add_Insn &prev_add, Add_Insn &add)
4571 if (! prev_add.valid ()
4572 || reg_seen_between_p (add.dest, prev_add.insn, add.insn))
4574 // If something is between the previous Add and the current insn,
4575 // we can set the previous Add invalid to speed future scans.
4576 return prev_add.insn = nullptr;
4579 // Found two PLUS insns in the same BB, and with no interfering
4580 // insns between them.
4581 rtx plus = plus_constant (Pmode, add.src, INTVAL (prev_add.addend));
4583 rtx_insn *next;
4584 if (REG_P (plus))
4586 avr_dump (";; Add[%d] from %d annihilates %d\n\n", add.regno,
4587 INSN_UID (prev_add.insn), INSN_UID (add.insn));
4588 next = NEXT_INSN (add.insn);
4590 else
4592 // Emit after the current insn, so that it will be picked
4593 // up as next valid Add insn.
4594 next = emit_move_ccc_after (add.dest, plus, add.insn);
4595 avr_dump (";; #1 new Add[%d] insn %d after %d: R%d += %d\n\n",
4596 add.regno, INSN_UID (next), INSN_UID (add.insn),
4597 add.regno, (int) INTVAL (XEXP (plus, 1)));
4598 gcc_assert (GET_CODE (plus) == PLUS);
4601 add.set_deleted ();
4602 prev_add.set_deleted ();
4604 return next;
4607 /* Try to combine a PLUS of the address register with a load or store insn.
4608 If ADD is valid, then it precedes MEM in the same basic block.
4609 When a replacement is found, a new insn is emitted and the old insns
4610 are pseudo-deleted. The returned insn is the point where the calling
4611 scanner should continue. When no replacement is found, nullptr is
4612 returned and nothing changed. */
4614 rtx_insn *
4615 avr_pass_fuse_add::fuse_add_mem (Add_Insn &add, Mem_Insn &mem)
4617 if (! add.valid ()
4618 || reg_seen_between_p (add.dest, add.insn, mem.insn))
4620 // If something is between the Add and the current insn, we can
4621 // set the Add invalid to speed future scans.
4622 return add.insn = nullptr;
4625 AVR_LdSt_Props ap { mem };
4627 int msize = GET_MODE_SIZE (mem.mode);
4629 // The mem insn really wants PRE_DEC.
4630 bool case1 = ((mem.addr_code == REG || mem.addr_code == POST_INC)
4631 && msize > 1 && ap.want_predec && ! ap.has_ldd);
4633 // The offset can be consumed by a PRE_DEC.
4634 bool case2 = (- INTVAL (add.addend) == msize
4635 && (mem.addr_code == REG || mem.addr_code == POST_INC)
4636 && ap.has_predec && ! ap.want_postinc);
4638 if (! case1 && ! case2)
4639 return nullptr;
4641 // Change from REG or POST_INC to PRE_DEC.
4642 rtx xmem = change_address (mem.mem, mem.mode,
4643 gen_rtx_PRE_DEC (Pmode, mem.addr_reg));
4644 rtx dest = mem.store_p ? xmem : mem.reg_or_0;
4645 rtx src = mem.store_p ? mem.reg_or_0 : xmem;
4647 rtx_insn *next = emit_move_ccc_after (dest, src, mem.insn);
4648 add_reg_note (next, REG_INC, mem.addr_reg);
4649 avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", mem.addr_regno,
4650 INSN_UID (next), INSN_UID (mem.insn), dest, src);
4652 // Changing REG or POST_INC -> PRE_DEC means that the addend before
4653 // the memory access must be increased by the size of the access,
4654 rtx plus = plus_constant (Pmode, add.src, msize);
4655 if (! REG_P (plus))
4657 rtx_insn *insn = emit_move_ccc_after (add.dest, plus, add.insn);
4658 avr_dump (";; #2 new Add[%d] insn %d after %d: R%d += %d\n\n",
4659 add.regno, INSN_UID (insn), INSN_UID (add.insn),
4660 add.regno, (int) INTVAL (XEXP (plus, 1)));
4661 gcc_assert (GET_CODE (plus) == PLUS);
4663 else
4664 avr_dump (";; Add[%d] insn %d consumed into %d\n\n",
4665 add.regno, INSN_UID (add.insn), INSN_UID (next));
4667 // Changing POST_INC -> PRE_DEC means that the addend after the mem has to be
4668 // the size of the access. The hope is that this new add insn may be unused.
4669 if (mem.addr_code == POST_INC)
4671 plus = plus_constant (Pmode, add.dest, msize);
4672 rtx_insn *next2 = emit_move_ccc_after (add.dest, plus, next);
4673 avr_dump (";; #3 new Add[%d] insn %d after %d: R%d += %d\n\n", add.regno,
4674 INSN_UID (next2), INSN_UID (next), add.regno, msize);
4675 next = next2;
4678 add.set_deleted ();
4679 mem.set_deleted ();
4681 return next;
4684 /* Try to combine a load or store insn with a PLUS of the address register.
4685 If MEM is valid, then it precedes ADD in the same basic block.
4686 When a replacement is found, a new insn is emitted and the old insns
4687 are pseudo-deleted. The returned insn is the point where the calling
4688 scanner should continue. When no replacement is found, nullptr is
4689 returned and nothing changed. */
4691 rtx_insn *
4692 avr_pass_fuse_add::fuse_mem_add (Mem_Insn &mem, Add_Insn &add)
4694 if (! mem.valid ()
4695 || reg_seen_between_p (add.dest, mem.insn, add.insn))
4697 // If something is between the Mem and the current insn, we can
4698 // set the Mem invalid to speed future scans.
4699 return mem.insn = nullptr;
4702 AVR_LdSt_Props ap { mem };
4704 int msize = GET_MODE_SIZE (mem.mode);
4706 // The add insn can be consumed by a POST_INC.
4707 bool case1 = (mem.addr_code == REG
4708 && INTVAL (add.addend) == msize
4709 && ap.has_postinc && ! ap.want_predec);
4711 // There are cases where even a partial consumption of the offset is better.
4712 // This are the cases where no LD+offset addressing is available, because
4713 // the address register is obviously used after the mem insn, and a mem insn
4714 // with REG addressing mode will have to restore the address.
4715 bool case2 = (mem.addr_code == REG
4716 && msize > 1 && ap.want_postinc && ! ap.has_ldd);
4718 if (! case1 && ! case2)
4719 return nullptr;
4721 // Change addressing mode from REG to POST_INC.
4722 rtx xmem = change_address (mem.mem, mem.mode,
4723 gen_rtx_POST_INC (Pmode, mem.addr_reg));
4724 rtx dest = mem.store_p ? xmem : mem.reg_or_0;
4725 rtx src = mem.store_p ? mem.reg_or_0 : xmem;
4727 rtx_insn *insn = emit_move_ccc_after (dest, src, mem.insn);
4728 add_reg_note (insn, REG_INC, mem.addr_reg);
4729 avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", add.regno,
4730 INSN_UID (insn), INSN_UID (mem.insn), dest, src);
4732 rtx_insn *next = NEXT_INSN (add.insn);
4734 // Changing REG -> POST_INC means that the post addend must be
4735 // decreased by the size of the access.
4736 rtx plus = plus_constant (Pmode, add.src, -msize);
4737 if (! REG_P (plus))
4739 next = emit_move_ccc_after (mem.addr_reg, plus, add.insn);
4740 avr_dump (";; #4 new Add[%d] insn %d after %d: R%d += %d\n\n",
4741 add.regno, INSN_UID (next), INSN_UID (add.insn),
4742 add.regno, (int) INTVAL (XEXP (plus, 1)));
4743 gcc_assert (GET_CODE (plus) == PLUS);
4745 else
4746 avr_dump (";; Add[%d] insn %d consumed into %d\n\n",
4747 add.regno, INSN_UID (add.insn), INSN_UID (insn));
4749 add.set_deleted ();
4750 mem.set_deleted ();
4752 return next;
4755 /* Try to post-reload combine PLUS with CONST_INt of pointer registers with:
4756 - Sets to a constant address.
4757 - PLUS insn of that kind.
4758 - Indirect loads and stores.
4759 In almost all cases, combine opportunities arise from the preparation
4760 done by `avr_split_fake_addressing_move', but in some rare cases combinations
4761 are found for the ordinary cores, too.
4762 As we consider at most one Mem insn per try, there may still be missed
4763 optimizations like POST_INC + PLUS + POST_INC might be performed
4764 as PRE_DEC + PRE_DEC for two adjacent locations. */
4766 unsigned int
4767 avr_pass_fuse_add::execute1 (function *func)
4769 df_note_add_problem ();
4770 df_analyze ();
4772 int n_add = 0, n_mem = 0, n_ldi = 0;
4773 basic_block bb;
4775 FOR_EACH_BB_FN (bb, func)
4777 Ldi_Insn prev_ldi_insns[REG_32];
4778 Add_Insn prev_add_insns[REG_32];
4779 Mem_Insn prev_mem_insns[REG_32];
4780 rtx_insn *insn, *curr;
4782 avr_dump ("\n;; basic block %d\n\n", bb->index);
4784 FOR_BB_INSNS_SAFE (bb, insn, curr)
4786 rtx_insn *next = nullptr;
4787 Ldi_Insn ldi_insn { insn };
4788 Add_Insn add_insn { insn };
4789 Mem_Insn mem_insn { insn };
4791 if (add_insn.valid ())
4793 // Found reg:HI += const_int
4794 avr_dump (";; insn %d: Add[%d]: R%d += %d\n\n",
4795 INSN_UID (add_insn.insn), add_insn.regno,
4796 add_insn.regno, (int) INTVAL (add_insn.addend));
4797 Ldi_Insn &prev_ldi_insn = prev_ldi_insns[add_insn.regno];
4798 Add_Insn &prev_add_insn = prev_add_insns[add_insn.regno];
4799 Mem_Insn &prev_mem_insn = prev_mem_insns[add_insn.regno];
4800 if ((next = fuse_ldi_add (prev_ldi_insn, add_insn)))
4801 curr = next, n_ldi += 1;
4802 else if ((next = fuse_add_add (prev_add_insn, add_insn)))
4803 curr = next, n_add += 1;
4804 else if ((next = fuse_mem_add (prev_mem_insn, add_insn)))
4805 curr = next, n_mem += 1;
4806 else
4807 prev_add_insn = add_insn;
4809 else if (mem_insn.valid ())
4811 int addr_regno = REGNO (mem_insn.addr_reg);
4812 avr_dump (";; insn %d: Mem[%d]: %r = %r\n\n",
4813 INSN_UID (mem_insn.insn), addr_regno,
4814 mem_insn.dest, mem_insn.src);
4815 Add_Insn &prev_add_insn = prev_add_insns[addr_regno];
4816 if ((next = fuse_add_mem (prev_add_insn, mem_insn)))
4817 curr = next, n_mem += 1;
4818 else
4819 prev_mem_insns[addr_regno] = mem_insn;
4821 else if (ldi_insn.valid ())
4823 if (! CONST_INT_P (ldi_insn.src))
4824 avr_dump (";; insn %d: Ldi[%d]: R%d = %r\n\n",
4825 INSN_UID (ldi_insn.insn), ldi_insn.regno,
4826 ldi_insn.regno, ldi_insn.src);
4827 prev_ldi_insns[ldi_insn.regno] = ldi_insn;
4829 } // for insns
4830 } // for BBs
4832 avr_dump (";; Function %f: Found %d changes: %d ldi, %d add, %d mem.\n",
4833 n_ldi + n_add + n_mem, n_ldi, n_add, n_mem);
4835 return 0;
4840 //////////////////////////////////////////////////////////////////////////////
4841 // Split shift insns after peephole2 / befor avr-fuse-move.
4843 static const pass_data avr_pass_data_split_after_peephole2 =
4845 RTL_PASS, // type
4846 "", // name (will be patched)
4847 OPTGROUP_NONE, // optinfo_flags
4848 TV_DF_SCAN, // tv_id
4849 0, // properties_required
4850 0, // properties_provided
4851 0, // properties_destroyed
4852 0, // todo_flags_start
4853 0 // todo_flags_finish
4856 class avr_pass_split_after_peephole2 : public rtl_opt_pass
4858 public:
4859 avr_pass_split_after_peephole2 (gcc::context *ctxt, const char *name)
4860 : rtl_opt_pass (avr_pass_data_split_after_peephole2, ctxt)
4862 this->name = name;
4865 unsigned int execute (function *) final override
4867 if (avr_shift_is_3op ())
4868 split_all_insns ();
4869 return 0;
4872 }; // avr_pass_split_after_peephole2
4874 } // anonymous namespace
4877 /* Whether some shift insn alternatives are a `3op' 3-operand insn.
4878 This 3op alternatives allow the source and the destination register
4879 of the shift to be different right from the start, because the splitter
4880 will split the 3op shift into a 3-operand byte shift and a 2-operand
4881 residual bit shift. (When the residual shift has an offset of one
4882 less than the bitsize, then the residual shift is also a 3op insn.) */
4884 bool
4885 avr_shift_is_3op ()
4887 // Don't split for OPTIMIZE_SIZE_MAX (-Oz).
4888 // For OPTIMIZE_SIZE_BALANCED (-Os), we still split because
4889 // the size overhead (if at all) is marginal.
4891 return (avropt_split_bit_shift
4892 && optimize > 0
4893 && avr_optimize_size_level () < OPTIMIZE_SIZE_MAX);
4897 /* Implement constraints `C2a', `C2l', `C2r' ... `C4a', `C4l', `C4r'.
4898 Whether we split an N_BYTES shift of code CODE in { ASHIFTRT,
4899 LSHIFTRT, ASHIFT } into a byte shift and a residual bit shift. */
4901 bool
4902 avr_split_shift_p (int n_bytes, int offset, rtx_code code)
4904 gcc_assert (n_bytes == 4 || n_bytes == 3 || n_bytes == 2);
4906 if (! avr_shift_is_3op ()
4907 || offset % 8 == 0)
4908 return false;
4910 if (n_bytes == 4)
4911 return select<bool>()
4912 : code == ASHIFT ? IN_RANGE (offset, 9, 31) && offset != 15
4913 : code == ASHIFTRT ? IN_RANGE (offset, 9, 29) && offset != 15
4914 : code == LSHIFTRT ? IN_RANGE (offset, 9, 31) && offset != 15
4915 : bad_case<bool> ();
4917 if (n_bytes == 3)
4918 return select<bool>()
4919 : code == ASHIFT ? IN_RANGE (offset, 9, 23) && offset != 15
4920 : code == ASHIFTRT ? IN_RANGE (offset, 9, 21) && offset != 15
4921 : code == LSHIFTRT ? IN_RANGE (offset, 9, 23) && offset != 15
4922 : bad_case<bool> ();
4924 if (n_bytes == 2)
4925 return select<bool>()
4926 : code == ASHIFT ? IN_RANGE (offset, 9, 15)
4927 : code == ASHIFTRT ? IN_RANGE (offset, 9, 13)
4928 : code == LSHIFTRT ? IN_RANGE (offset, 9, 15)
4929 : bad_case<bool> ();
4931 return false;
4935 /* Emit a DEST = SRC <code> OFF shift of QImode, HImode or PSImode.
4936 SCRATCH is a QImode d-register, scratch:QI, or NULL_RTX.
4937 This function is used to emit shifts that have been split into
4938 a byte shift and a residual bit shift that operates on a mode
4939 strictly smaller than the original shift. */
4941 static void
4942 avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch)
4944 const machine_mode mode = GET_MODE (dest);
4945 const int n_bits = GET_MODE_BITSIZE (mode);
4946 rtx xoff = GEN_INT (off);
4948 // Work out which alternatives can handle 3 operands independent
4949 // of options.
4951 const bool b8_is_3op = off == 6;
4953 const bool b16_is_3op = select<bool>()
4954 : code == ASHIFT ? (satisfies_constraint_C7c (xoff) // 7...12
4955 // The "C05 C06" alternative of *ashlhi3_const.
4956 || (AVR_HAVE_MUL && scratch && (off == 5 || off == 6)))
4957 : code == LSHIFTRT ? satisfies_constraint_C7c (xoff)
4958 : code == ASHIFTRT ? off == 7
4959 : bad_case<bool> ();
4961 const bool b24_is_3op = select<bool>()
4962 : code == ASHIFT ? off == 15
4963 : code == LSHIFTRT ? off == 15
4964 : code == ASHIFTRT ? false
4965 : bad_case<bool> ();
4967 const bool is_3op = (off % 8 == 0
4968 || off == n_bits - 1
4969 || (code == ASHIFTRT && off == n_bits - 2)
4970 || (n_bits == 8 && b8_is_3op)
4971 || (n_bits == 16 && b16_is_3op)
4972 || (n_bits == 24 && b24_is_3op));
4973 rtx shift;
4975 if (is_3op)
4977 shift = gen_rtx_fmt_ee (code, mode, src, xoff);
4979 else
4981 if (REGNO (dest) != REGNO (src))
4982 emit_valid_move_clobbercc (dest, src);
4983 shift = gen_rtx_fmt_ee (code, mode, dest, xoff);
4986 if (n_bits == 8)
4987 // 8-bit shifts don't have a scratch operand.
4988 scratch = NULL_RTX;
4989 else if (! scratch && n_bits == 24)
4990 // 24-bit shifts always have a scratch operand.
4991 scratch = gen_rtx_SCRATCH (QImode);
4993 emit_valid_move_clobbercc (dest, shift, scratch);
4997 /* Handle the 4-byte case of avr_split_shift below:
4998 Split 4-byte shift DEST = SRC <code> IOFF into a 3-operand
4999 byte shift and a residual shift in a smaller mode if possible.
5000 SCRATCH is a QImode upper scratch register or NULL_RTX. */
5002 static bool
5003 avr_split_shift4 (rtx dest, rtx src, int ioff, rtx scratch, rtx_code code)
5005 gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 4);
5007 if (code == ASHIFT)
5009 if (IN_RANGE (ioff, 25, 31))
5011 rtx dst8 = avr_byte (dest, 3);
5012 rtx src8 = avr_byte (src, 0);
5013 avr_emit_shift (code, dst8, src8, ioff - 24, NULL_RTX);
5014 emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx);
5015 emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
5016 return true;
5018 else if (IN_RANGE (ioff, 17, 23))
5020 rtx dst16 = avr_word (dest, 2);
5021 rtx src16 = avr_word (src, 0);
5022 avr_emit_shift (code, dst16, src16, ioff - 16, scratch);
5023 emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
5024 return true;
5026 // ...the 9...14 cases are only handled by define_split because
5027 // for now, we don't exploit that the low byte is zero.
5029 else if (code == ASHIFTRT
5030 || code == LSHIFTRT)
5032 if (IN_RANGE (ioff, 25, 30 + (code == LSHIFTRT)))
5034 rtx dst8 = avr_byte (dest, 0);
5035 rtx src8 = avr_byte (src, 3);
5036 avr_emit_shift (code, dst8, src8, ioff - 24, NULL_RTX);
5037 if (code == ASHIFTRT)
5039 rtx signs = avr_byte (dest, 1);
5040 avr_emit_shift (code, signs, src8, 7, NULL_RTX);
5041 emit_valid_move_clobbercc (avr_byte (dest, 2), signs);
5042 emit_valid_move_clobbercc (avr_byte (dest, 3), signs);
5044 else
5046 emit_valid_move_clobbercc (avr_byte (dest, 1), const0_rtx);
5047 emit_valid_move_clobbercc (avr_word (dest, 2), const0_rtx);
5049 return true;
5051 else if (IN_RANGE (ioff, 17, 23))
5053 rtx dst16 = avr_word (dest, 0);
5054 rtx src16 = avr_word (src, 2);
5055 avr_emit_shift (code, dst16, src16, ioff - 16, scratch);
5056 if (code == ASHIFTRT)
5058 rtx msb = avr_byte (src, 3);
5059 rtx signs = avr_byte (dest, 2);
5060 avr_emit_shift (code, signs, msb, 7, NULL_RTX);
5061 emit_valid_move_clobbercc (avr_byte (dest, 3), signs);
5063 else
5064 emit_valid_move_clobbercc (avr_word (dest, 2), const0_rtx);
5066 return true;
5068 else if (IN_RANGE (ioff, 9, 15))
5070 avr_emit_shift (code, dest, src, 8, scratch);
5071 rtx dst24 = avr_chunk (PSImode, dest, 0);
5072 rtx src24 = avr_chunk (PSImode, dest, 0);
5073 avr_emit_shift (code, dst24, src24, ioff - 8, scratch);
5074 return true;
5077 else
5078 gcc_unreachable ();
5080 return false;
5084 /* Handle the 3-byte case of avr_split_shift below:
5085 Split 3-byte shift DEST = SRC <code> IOFF into a 3-operand
5086 byte shift and a residual shift in a smaller mode if possible.
5087 SCRATCH is a QImode upper scratch register or NULL_RTX. */
5089 static bool
5090 avr_split_shift3 (rtx dest, rtx src, int ioff, rtx scratch, rtx_code code)
5092 gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 3);
5094 if (code == ASHIFT)
5096 if (IN_RANGE (ioff, 17, 23))
5098 rtx dst8 = avr_byte (dest, 2);
5099 rtx src8 = avr_byte (src, 0);
5100 avr_emit_shift (code, dst8, src8, ioff - 16, NULL_RTX);
5101 emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
5102 return true;
5104 // ...the 9...14 cases are only handled by define_split because
5105 // for now, we don't exploit that the low byte is zero.
5107 else if (code == ASHIFTRT
5108 || code == LSHIFTRT)
5110 if (IN_RANGE (ioff, 17, 22 + (code == LSHIFTRT)))
5112 rtx dst8 = avr_byte (dest, 0);
5113 rtx src8 = avr_byte (src, 2);
5114 avr_emit_shift (code, dst8, src8, ioff - 16, NULL_RTX);
5115 if (code == ASHIFTRT)
5117 rtx signs = avr_byte (dest, 1);
5118 avr_emit_shift (code, signs, src8, 7, NULL_RTX);
5119 emit_valid_move_clobbercc (avr_byte (dest, 2), signs);
5121 else
5123 emit_valid_move_clobbercc (avr_byte (dest, 1), const0_rtx);
5124 emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx);
5126 return true;
5128 else if (IN_RANGE (ioff, 9, 15))
5130 avr_emit_shift (code, dest, src, 8, scratch);
5131 rtx dst16 = avr_chunk (HImode, dest, 0);
5132 rtx src16 = avr_chunk (HImode, dest, 0);
5133 avr_emit_shift (code, dst16, src16, ioff - 8, scratch);
5134 return true;
5137 else
5138 gcc_unreachable ();
5140 return false;
5144 /* Handle the 2-byte case of avr_split_shift below:
5145 Split 2-byte shift DEST = SRC <code> IOFF into a 3-operand
5146 byte shift and a residual shift in a smaller mode if possible.
5147 SCRATCH is a QImode upper scratch register or NULL_RTX. */
5149 static bool
5150 avr_split_shift2 (rtx dest, rtx src, int ioff, rtx /*scratch*/, rtx_code code)
5152 gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 2);
5154 if (code == ASHIFT)
5156 if (IN_RANGE (ioff, 9, 15))
5158 rtx dst8 = avr_byte (dest, 1);
5159 rtx src8 = avr_byte (src, 0);
5160 avr_emit_shift (code, dst8, src8, ioff - 8, NULL_RTX);
5161 emit_valid_move_clobbercc (avr_byte (dest, 0), const0_rtx);
5162 return true;
5165 else if (code == ASHIFTRT
5166 || code == LSHIFTRT)
5168 if (IN_RANGE (ioff, 9, 14 + (code == LSHIFTRT)))
5170 rtx dst8 = avr_byte (dest, 0);
5171 rtx src8 = avr_byte (src, 1);
5172 rtx signs = const0_rtx;
5173 avr_emit_shift (code, dst8, src8, ioff - 8, NULL_RTX);
5174 if (code == ASHIFTRT)
5176 signs = avr_byte (dest, 1);
5177 avr_emit_shift (code, signs, src8, 7, NULL_RTX);
5179 emit_valid_move_clobbercc (avr_byte (dest, 1), signs);
5180 return true;
5183 else
5184 gcc_unreachable ();
5186 return false;
5190 /* Worker for a define_split that runs when -msplit-bit-shift is on.
5191 Split a shift of code CODE into a 3op byte shift and a residual bit shift.
5192 Return 'true' when a split has been performed and insns have been emitted.
5193 Otherwise, return 'false'. */
5195 bool
5196 avr_split_shift (rtx xop[], rtx scratch, rtx_code code)
5198 scratch = scratch && REG_P (scratch) ? scratch : NULL_RTX;
5199 rtx dest = xop[0];
5200 rtx src = xop[1];
5201 int ioff = INTVAL (xop[2]);
5202 int n_bytes = GET_MODE_SIZE (GET_MODE (dest));
5204 return select<bool>()
5205 : n_bytes == 2 ? avr_split_shift2 (dest, src, ioff, scratch, code)
5206 : n_bytes == 3 ? avr_split_shift3 (dest, src, ioff, scratch, code)
5207 : n_bytes == 4 ? avr_split_shift4 (dest, src, ioff, scratch, code)
5208 : bad_case<bool> ();
5212 namespace
5216 //////////////////////////////////////////////////////////////////////////////
5217 // Determine whether an ISR may use the __gcc_isr pseudo-instruction.
5219 static const pass_data avr_pass_data_pre_proep =
5221 RTL_PASS, // type
5222 "", // name (will be patched)
5223 OPTGROUP_NONE, // optinfo_flags
5224 TV_DF_SCAN, // tv_id
5225 0, // properties_required
5226 0, // properties_provided
5227 0, // properties_destroyed
5228 0, // todo_flags_start
5229 0 // todo_flags_finish
5232 class avr_pass_pre_proep : public rtl_opt_pass
5234 public:
5235 avr_pass_pre_proep (gcc::context *ctxt, const char *name)
5236 : rtl_opt_pass (avr_pass_data_pre_proep, ctxt)
5238 this->name = name;
5241 void compute_maybe_gasisr (function *);
5243 unsigned int execute (function *fun) final override
5245 if (avropt_gasisr_prologues
5246 // Whether this function is an ISR worth scanning at all.
5247 && !fun->machine->is_no_gccisr
5248 && (fun->machine->is_interrupt
5249 || fun->machine->is_signal)
5250 && !cfun->machine->is_naked
5251 // Paranoia: Non-local gotos and labels that might escape.
5252 && !cfun->calls_setjmp
5253 && !cfun->has_nonlocal_label
5254 && !cfun->has_forced_label_in_static)
5256 compute_maybe_gasisr (fun);
5259 return 0;
5262 }; // avr_pass_pre_proep
5265 /* Set fun->machine->gasisr.maybe provided we don't find anything that
5266 prohibits GAS generating parts of ISR prologues / epilogues for us. */
5268 void
5269 avr_pass_pre_proep::compute_maybe_gasisr (function *fun)
5271 // Don't use BB iterators so that we see JUMP_TABLE_DATA.
5273 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
5275 // Transparent calls always use [R]CALL and are filtered out by GAS.
5276 // ISRs don't use -mcall-prologues, hence what remains to be filtered
5277 // out are open coded (tail) calls.
5279 if (CALL_P (insn))
5280 return;
5282 // __tablejump2__ clobbers something and is targeted by JMP so
5283 // that GAS won't see its usage.
5285 if (AVR_HAVE_JMP_CALL
5286 && JUMP_TABLE_DATA_P (insn))
5287 return;
5289 // Non-local gotos not seen in *FUN.
5291 if (JUMP_P (insn)
5292 && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
5293 return;
5296 fun->machine->gasisr.maybe = 1;
5301 //////////////////////////////////////////////////////////////////////////////
5302 // Late recomputation of notes so we can use `reg_unused_after()' and friends.
5304 static const pass_data avr_pass_data_recompute_notes =
5306 RTL_PASS, // type
5307 "", // name (will be patched)
5308 OPTGROUP_NONE, // optinfo_flags
5309 TV_DF_SCAN, // tv_id
5310 0, // properties_required
5311 0, // properties_provided
5312 0, // properties_destroyed
5313 0, // todo_flags_start
5314 TODO_df_finish | TODO_df_verify // todo_flags_finish
5317 class avr_pass_recompute_notes : public rtl_opt_pass
5319 public:
5320 avr_pass_recompute_notes (gcc::context *ctxt, const char *name)
5321 : rtl_opt_pass (avr_pass_data_recompute_notes, ctxt)
5323 this->name = name;
5326 unsigned int execute (function *) final override
5328 df_note_add_problem ();
5329 df_analyze ();
5331 return 0;
5333 }; // avr_pass_recompute_notes
5335 } // anonymous namespace
5339 //////////////////////////////////////////////////////////////////////////////
5340 // Function visible and used outside this module.
5342 /* During reload, we allow much more addresses than Reduced Tiny actually
5343 supports. Split them after reload in order to get closer to the
5344 core's capabilities. This sets the stage for pass .avr-fuse-add. */
5346 bool
5347 avr_split_fake_addressing_move (rtx_insn * /*insn*/, rtx *xop)
5349 bool store_p = false;
5350 rtx mem, reg_or_0;
5352 if (REG_P (xop[0]) && MEM_P (xop[1]))
5354 reg_or_0 = xop[0];
5355 mem = xop[1];
5357 else if (MEM_P (xop[0])
5358 && (REG_P (xop[1])
5359 || xop[1] == CONST0_RTX (GET_MODE (xop[0]))))
5361 mem = xop[0];
5362 reg_or_0 = xop[1];
5363 store_p = true;
5365 else
5366 return false;
5368 machine_mode mode = GET_MODE (mem);
5369 rtx base, addr = XEXP (mem, 0);
5370 rtx_code addr_code = GET_CODE (addr);
5372 if (REG_P (reg_or_0)
5373 && reg_overlap_mentioned_p (reg_or_0, addr))
5374 return false;
5375 else if (addr_code == PLUS || addr_code == PRE_DEC || addr_code == POST_INC)
5376 base = XEXP (addr, 0);
5377 else if (addr_code == REG)
5378 base = addr;
5379 else
5380 return false;
5382 if (REGNO (base) > REG_Z)
5383 return false;
5385 if (! AVR_TINY
5386 // Only keep base registers that can't do PLUS addressing.
5387 && ((REGNO (base) != REG_X
5388 && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem)))
5389 || avr_load_libgcc_p (mem)
5390 || avr_mem_memx_p (mem)))
5391 return false;
5393 bool volatile_p = MEM_VOLATILE_P (mem);
5394 bool mem_volatile_p = false;
5395 if (frame_pointer_needed
5396 && REGNO (base) == FRAME_POINTER_REGNUM)
5398 if (avropt_fuse_add < 2
5399 // Be a projection (we always split PLUS).
5400 || (avropt_fuse_add == 2 && volatile_p && addr_code != PLUS))
5401 return false;
5403 // Changing the frame pointer locally may confuse later passes
5404 // like .dse2 which don't track changes of FP, not even when
5405 // respective CFA notes are present. An example is pr22141-1.c.
5406 if (avropt_fuse_add == 2)
5407 mem_volatile_p = true;
5410 rtx_code new_code = UNKNOWN;
5411 HOST_WIDE_INT add = 0, sub = 0;
5412 int msize = GET_MODE_SIZE (mode);
5414 AVR_LdSt_Props ap { (int) REGNO (base), store_p, volatile_p,
5415 ADDR_SPACE_GENERIC };
5417 switch (addr_code)
5419 default:
5420 return false;
5422 case PLUS:
5423 add = INTVAL (XEXP (addr, 1));
5424 if (msize == 1)
5426 new_code = REG;
5427 sub = -add;
5429 else if (ap.want_predec)
5431 // volatile stores prefer PRE_DEC (MSB first)
5432 sub = -add;
5433 add += msize;
5434 new_code = PRE_DEC;
5436 else
5438 new_code = POST_INC;
5439 sub = -add - msize;
5441 break;
5443 case POST_INC:
5444 // volatile stores prefer PRE_DEC (MSB first)
5445 if (msize > 1 && ap.want_predec)
5447 add = msize;
5448 new_code = PRE_DEC;
5449 sub = msize;
5450 break;
5452 return false;
5454 case PRE_DEC:
5455 // volatile loads prefer POST_INC (LSB first)
5456 if (msize > 1 && ap.want_postinc)
5458 add = -msize;
5459 new_code = POST_INC;
5460 sub = -msize;
5461 break;
5463 return false;
5465 case REG:
5466 if (msize == 1)
5467 return false;
5469 if (ap.want_predec)
5471 add = msize;
5472 new_code = PRE_DEC;
5473 sub = 0;
5475 else
5477 add = 0;
5478 new_code = POST_INC;
5479 sub = -msize;
5481 break;
5482 } // switch addr_code
5484 rtx_insn *insn;
5486 if (add)
5488 insn = emit_move_ccc (base, plus_constant (Pmode, base, add));
5489 avr_maybe_adjust_cfa (insn, base, add);
5492 rtx new_addr = new_code == REG
5493 ? base
5494 : gen_rtx_fmt_e (new_code, Pmode, base);
5496 rtx new_mem = change_address (mem, mode, new_addr);
5497 if (mem_volatile_p)
5498 MEM_VOLATILE_P (new_mem) = 1;
5500 insn = emit_move_ccc (store_p ? new_mem : reg_or_0,
5501 store_p ? reg_or_0 : new_mem);
5502 if (auto_inc_p (new_addr))
5504 add_reg_note (insn, REG_INC, base);
5505 int off = new_code == POST_INC ? msize : -msize;
5506 avr_maybe_adjust_cfa (insn, base, off);
5509 if (sub)
5511 insn = emit_move_ccc (base, plus_constant (Pmode, base, sub));
5512 avr_maybe_adjust_cfa (insn, base, sub);
5515 return true;
5519 /* Given memory reference mem(ADDR), return true when it can be split into
5520 single-byte moves, and all resulting addresses are natively supported.
5521 ADDR is in addr-space generic. */
5523 static bool
5524 splittable_address_p (rtx addr, int n_bytes)
5526 if (CONSTANT_ADDRESS_P (addr)
5527 || GET_CODE (addr) == PRE_DEC
5528 || GET_CODE (addr) == POST_INC)
5529 return true;
5531 if (! AVR_TINY)
5533 rtx base = select<rtx>()
5534 : REG_P (addr) ? addr
5535 : GET_CODE (addr) == PLUS ? XEXP (addr, 0)
5536 : NULL_RTX;
5538 int off = select<int>()
5539 : REG_P (addr) ? 0
5540 : GET_CODE (addr) == PLUS ? (int) INTVAL (XEXP (addr, 1))
5541 : -1;
5543 return (base && REG_P (base)
5544 && (REGNO (base) == REG_Y || REGNO (base) == REG_Z)
5545 && IN_RANGE (off, 0, 64 - n_bytes));
5548 return false;
5552 /* Like avr_byte(), but also knows how to split POST_INC and PRE_DEC
5553 memory references. */
5555 static rtx
5556 avr_byte_maybe_mem (rtx x, int n)
5558 rtx addr, b;
5559 if (MEM_P (x)
5560 && (GET_CODE (addr = XEXP (x, 0)) == POST_INC
5561 || GET_CODE (addr) == PRE_DEC))
5562 b = gen_rtx_MEM (QImode, copy_rtx (addr));
5563 else
5564 b = avr_byte (x, n);
5566 if (MEM_P (x))
5567 gcc_assert (MEM_P (b));
5569 return b;
5573 /* Split multi-byte load / stores into 1-byte such insns
5574 provided non-volatile, addr-space = generic, no reg-overlap
5575 and the resulting addressings are all natively supported.
5576 Returns true when the XOP[0] = XOP[1] insn has been split and
5577 false, otherwise. */
5579 bool
5580 avr_split_ldst (rtx *xop)
5582 rtx dest = xop[0];
5583 rtx src = xop[1];
5584 machine_mode mode = GET_MODE (dest);
5585 int n_bytes = GET_MODE_SIZE (mode);
5586 rtx mem, reg_or_0;
5588 if (MEM_P (dest) && reg_or_0_operand (src, mode))
5590 mem = dest;
5591 reg_or_0 = src;
5593 else if (register_operand (dest, mode) && MEM_P (src))
5595 reg_or_0 = dest;
5596 mem = src;
5598 else
5599 return false;
5601 rtx addr = XEXP (mem, 0);
5603 if (MEM_VOLATILE_P (mem)
5604 || ! ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
5605 || ! IN_RANGE (n_bytes, 2, 4)
5606 || ! splittable_address_p (addr, n_bytes)
5607 || reg_overlap_mentioned_p (reg_or_0, addr))
5608 return false;
5610 const int step = GET_CODE (addr) == PRE_DEC ? -1 : 1;
5611 const int istart = step > 0 ? 0 : n_bytes - 1;
5612 const int iend = istart + step * n_bytes;
5614 for (int i = istart; i != iend; i += step)
5616 rtx di = avr_byte_maybe_mem (dest, i);
5617 rtx si = avr_byte_maybe_mem (src, i);
5618 emit_move_ccc (di, si);
5621 return true;
5626 // Functions make_<pass-name> (gcc::context*) where <pass-name> is
5627 // according to the pass declaration in avr-passes.def. GCC's pass
5628 // manager uses these function to create the respective pass object.
5630 // Optimize results of the casesi expander for modes < SImode.
5632 rtl_opt_pass *
5633 make_avr_pass_casesi (gcc::context *ctxt)
5635 return new avr_pass_casesi (ctxt, "avr-casesi");
5638 // Try to replace 2 cbranch insns with 1 comparison and 2 branches.
5640 rtl_opt_pass *
5641 make_avr_pass_ifelse (gcc::context *ctxt)
5643 return new avr_pass_ifelse (ctxt, "avr-ifelse");
5646 // Determine whether an ISR may use the __gcc_isr pseudo-instruction.
5648 rtl_opt_pass *
5649 make_avr_pass_pre_proep (gcc::context *ctxt)
5651 return new avr_pass_pre_proep (ctxt, "avr-pre-proep");
5654 // Find more POST_INC and PRE_DEC cases.
5656 rtl_opt_pass *
5657 make_avr_pass_fuse_add (gcc::context *ctxt)
5659 return new avr_pass_fuse_add (ctxt, "avr-fuse-add");
5662 // Late recomputation of notes so we can use `reg_unused_after()' and friends.
5664 rtl_opt_pass *
5665 make_avr_pass_recompute_notes (gcc::context *ctxt)
5667 return new avr_pass_recompute_notes (ctxt, "avr-notes-free-cfg");
5670 // Optimize moves after reload.
5672 rtl_opt_pass *
5673 make_avr_pass_fuse_move (gcc::context *ctxt)
5675 return new avr_pass_fuse_move (ctxt, "avr-fuse-move");
5678 // Split insns after peephole2 / befor avr-fuse-move.
5680 rtl_opt_pass *
5681 make_avr_pass_split_after_peephole2 (gcc::context *ctxt)
5683 return new avr_pass_split_after_peephole2 (ctxt, "avr-split-after-peephole2");