gcc/config/avr/avr-passes.cc

   1 /* Support for avr-passes.def for AVR 8-bit microcontrollers.
   2    Copyright (C) 2024-2025 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 #define IN_TARGET_CODE 1
  21
  22 #define INCLUDE_ARRAY
  23 #define INCLUDE_VECTOR
  24 #include "config.h"
  25 #include "system.h"
  26 #include "intl.h"
  27 #include "coretypes.h"
  28 #include "backend.h"
  29 #include "target.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "cfghooks.h"
  33 #include "cfganal.h"
  34 #include "df.h"
  35 #include "memmodel.h"
  36 #include "tm_p.h"
  37 #include "optabs.h"
  38 #include "regs.h"
  39 #include "emit-rtl.h"
  40 #include "recog.h"
  41 #include "explow.h"
  42 #include "cfgrtl.h"
  43 #include "context.h"
  44 #include "tree-pass.h"
  45 #include "insn-attr.h"
  46 #include "tm-constrs.h"
  47
  48
  49 #define CONST_INT_OR_FIXED_P(X) (CONST_INT_P (X) || CONST_FIXED_P (X))
  50
  51 #define FIRST_GPR (AVR_TINY ? REG_18 : REG_2)
  52
  53
  54 // Emit pattern PAT, and ICE when the insn is not valid / not recognized.
  55
  56 static rtx_insn *
  57 emit_valid_insn (rtx pat)
  58 {
  59   rtx_insn *insn = emit_insn (pat);
  60
  61   if (! valid_insn_p (insn))  // Also runs recog().
  62     fatal_insn ("emit unrecognizable insn", insn);
  63
  64   return insn;
  65 }
  66
  67 // Emit a single_set with an optional scratch operand.  This function
  68 // asserts that the new insn is valid and recognized.
  69
  70 static rtx_insn *
  71 emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
  72 {
  73   rtx pat = scratch
  74     ? gen_gen_move_clobbercc_scratch (dest, src, scratch)
  75     : gen_gen_move_clobbercc (dest, src);
  76
  77   return emit_valid_insn (pat);
  78 }
  79
  80
  81 namespace
  82 {
  83
  84 /////////////////////////////////////////////////////////////////////////////
  85 // Before we start with the very code, introduce some helpers that are
  86 // quite generic, though up to now only avr-fuse-add makes use of them.
  87
  88 /* Get the next / previous NONDEBUG_INSN_P after INSN in basic block BB.
  89    This assumes we are in CFG layout mode so that BLOCK_FOR_INSN()
  90    can be used.  */
  91
  92 static rtx_insn *
  93 next_nondebug_insn_bb (basic_block bb, rtx_insn *insn, bool forward = true)
  94 {
  95   while (insn)
  96     {
  97       insn = forward ? NEXT_INSN (insn) : PREV_INSN (insn);
  98
  99       if (insn && NONDEBUG_INSN_P (insn))
 100         return BLOCK_FOR_INSN (insn) == bb ? insn : nullptr;
 101     }
 102
 103   return insn;
 104 }
 105
 106 static rtx_insn *
 107 prev_nondebug_insn_bb (basic_block bb, rtx_insn *insn)
 108 {
 109   return next_nondebug_insn_bb (bb, insn, false);
 110 }
 111
 112
 113 /* Like `single_set' with the addition that it sets REGNO_SCRATCH when the
 114    insn is a single_set with a QImode scratch register.  When the insn has
 115    no QImode scratch or just a scratch:QI, then set REGNO_SCRATCH = 0.
 116    The assumption is that the function is only used after the splits for
 117    REG_CC so that the pattern is a parallel with 2 elements (INSN has no
 118    scratch operand), or 3 elements (INSN does have a scratch operand).  */
 119
 120 static rtx
 121 single_set_with_scratch (rtx_insn *insn, int &regno_scratch)
 122 {
 123   regno_scratch = 0;
 124
 125   if (! INSN_P (insn))
 126     return NULL_RTX;
 127
 128   rtx set, clo, reg, pat = PATTERN (insn);
 129
 130   // Search for SET + CLOBBER(QI) + CLOBBER(CC).
 131   if (GET_CODE (pat) == PARALLEL
 132       && XVECLEN (pat, 0) == 3
 133       && GET_CODE (set = XVECEXP (pat, 0, 0)) == SET
 134       // At this pass, all insn are endowed with clobber(CC).
 135       && GET_CODE (clo = XVECEXP (pat, 0, 2)) == CLOBBER
 136       && GET_MODE (XEXP (clo, 0)) == CCmode
 137       && GET_CODE (clo = XVECEXP (pat, 0, 1)) == CLOBBER
 138       && REG_P (reg = XEXP (clo, 0))
 139       && GET_MODE (reg) == QImode)
 140     {
 141       regno_scratch = REGNO (reg);
 142       return set;
 143     }
 144
 145   return single_set (insn);
 146 }
 147
 148
 149 // One bit for each GRP in REG_0 ... REG_31.
 150 using gprmask_t = uint32_t;
 151
 152 // True when this is a valid GPR number for ordinary code, e.g.
 153 // registers wider than 2 bytes have to start at an exven regno.
 154 // TMP_REG and ZERO_REG are not considered valid, even though
 155 // the C source can use register vars with them.
 156 static inline bool
 157 gpr_regno_p (int regno, int n_bytes = 1)
 158 {
 159   return (IN_RANGE (regno, FIRST_GPR, REG_32 - n_bytes)
 160           // Size in { 1, 2, 3, 4, 8 } bytes.
 161           && ((1u << n_bytes) & 0x11e)
 162           // Registers >= 2 bytes start at an even regno.
 163           && (n_bytes == 1 || regno % 2 == 0));
 164 }
 165
 166 // There are cases where the C source defines local reg vars
 167 // for R1 etc.  The assumption is that this is handled before
 168 // calling this function, e.g. by skipping code when a register
 169 // overlaps with a fixed register.
 170 static inline gprmask_t
 171 regmask (int regno, int size)
 172 {
 173   gcc_checking_assert (gpr_regno_p (regno, size));
 174   gprmask_t bits = (1u << size) - 1;
 175
 176   return bits << regno;
 177 }
 178
 179 // Mask for hard register X that's some GPR, including fixed regs like R0.
 180 static gprmask_t
 181 regmask (rtx x)
 182 {
 183   gcc_assert (REG_P (x));
 184   gprmask_t bits = (1u << GET_MODE_SIZE (GET_MODE (x))) - 1;
 185
 186   return bits << REGNO (x);
 187 }
 188
 189
 190 // Whether X has bits in the range [B0 ... B1]
 191 static inline bool
 192 has_bits_in (gprmask_t x, int b0, int b1)
 193 {
 194   if (b0 > b1 || b0 > 31 || b1 < 0)
 195     return false;
 196
 197   const gprmask_t m = (2u << (b1 - b0)) - 1;
 198   return x & (m << b0);
 199 }
 200
 201
 202 template<typename T>
 203 T bad_case ()
 204 {
 205   gcc_unreachable ();
 206 }
 207
 208 #define select false ? bad_case
 209
 210
 211 namespace AVRasm
 212 {
 213   // Returns true when we a scratch reg is needed in order to get
 214   // (siged or unsigned) 8-bit value VAL in some GPR.
 215   // When it's about costs rather than the sheer requirement for a
 216   // scratch, see also AVRasm::constant_cost.
 217   static inline bool ldi_needs_scratch (int regno, int val)
 218   {
 219     return regno < REG_16 && IN_RANGE (val & 0xff, 2, 254);
 220   }
 221
 222   // Return a byte value x >= 0 such that  x <code> y == x for all y, or -1.
 223   static inline int neutral_val (rtx_code code)
 224   {
 225     return select<int>()
 226       : code == AND ? 0xff
 227       : code == IOR ? 0x00
 228       : code == XOR ? 0x00
 229       : code == PLUS ? 0
 230       : -1;
 231   }
 232
 233   // When there exists a value x such that the image of the function
 234   //   y -> y <code> x  has order 1, then return that x.  Else return -1.
 235   static inline int image1_val (rtx_code code)
 236   {
 237     return select<int>()
 238       : code == AND ? 0x00
 239       : code == IOR ? 0xff
 240       : -1;
 241   }
 242
 243   // Cost of 8-bit binary operation  x o= VAL  provided a scratch is
 244   // available as needed.
 245   static int constant_cost (rtx_code code, int regno, uint8_t val)
 246   {
 247     bool needs_scratch_p = select<bool>()
 248       : code == PLUS ? regno < REG_16 && val != 1 && val != 0xff
 249       : code == XOR ? val != 0xff && (regno < REG_16 || val != 0x80)
 250       : code == IOR ? regno < REG_16
 251       : code == AND ? regno < REG_16 && val != 0
 252       : code == SET ? regno < REG_16 && val != 0
 253       : bad_case<bool> ();
 254
 255     return val == AVRasm::neutral_val (code)
 256       ? 0
 257       : 1 + needs_scratch_p;
 258   }
 259 }; // AVRasm
 260
 261
 262 // Returns the mode mask for a mode size of SIZE bytes.
 263 static uint64_t size_to_mask (int size)
 264 {
 265   return ((uint64_t) 2 << (8 * size - 1)) - 1;
 266 }
 267
 268 // Return the scalar int mode for a modesize of 1, 2, 3, 4 or 8 bytes.
 269 static machine_mode size_to_mode (int size)
 270 {
 271   return select<machine_mode>()
 272     : size == 1 ? QImode
 273     : size == 2 ? HImode
 274     : size == 3 ? PSImode
 275     : size == 4 ? SImode
 276     : size == 8 ? DImode
 277     : bad_case<machine_mode> ();
 278 }
 279
 280 \f
 281 //////////////////////////////////////////////////////////////////////////////
 282 // Optimize moves after reload: -mfuse-move=<0,23>
 283
 284 /* The purpose of this pass is to perform optimizations after reload
 285    like the following ones:
 286
 287    Without optimization              |   With optimization
 288    ====================              |   =================
 289
 290    long long fn_zero (void)         (1)
 291    {
 292       return 0;
 293    }
 294
 295    ldi r18, 0     ;  movqi_insn      |   ldi r18, 0     ;  movqi_insn
 296    ldi r19, 0     ;  movqi_insn      |   ldi r19, 0     ;  movqi_insn
 297    ldi r20, 0     ;  movqi_insn      |   movw r20, r18  ;  *movhi
 298    ldi r21, 0     ;  movqi_insn      |
 299    ldi r22, 0     ;  movqi_insn      |   movw r22, r18  ;  *movhi
 300    ldi r23, 0     ;  movqi_insn      |
 301    ldi r24, 0     ;  movqi_insn      |   movw r24, r18  ;  *movhi
 302    ldi r25, 0     ;  movqi_insn      |
 303    ret                               |   ret
 304
 305    int fn_eq0 (char c)              (2)
 306    {
 307        return c == 0;
 308    }
 309
 310    mov r18, r24    ;  movqi_insn     |   mov r18, r24   ;  movqi_insn
 311    ldi r24, 1      ;  *movhi         |   ldi r24, 1     ;  *movhi
 312    ldi r25, 0                        |   ldi r25, 0
 313    cp  r18, ZERO   ;  cmpqi3         |   cpse r18, ZERO ;  peephole
 314    breq .+4        ;  branch         |
 315    ldi r24, 0      ;  *movhi         |   ldi r24, 0     ;  movqi_insn
 316    ldi r25, 0                        |
 317    ret                               |   ret
 318
 319    int a, b;                        (3)
 320
 321    void fn_store_ab (void)
 322    {
 323        a = 1;
 324        b = -1;
 325    }
 326
 327    ldi r24, 1      ;  *movhi         |  ldi r24, 1       ;  *movhi
 328    ldi r25, 0                        |  ldi r25, 0
 329    sts a+1, r25    ;  *movhi         |  sts a+1, r25     ;  *movhi
 330    sts a,   r24                      |  sts a,   r24
 331    ldi r24, -1     ;  *movhi         |  sbiw r24, 2      ;  *addhi3
 332    ldi r25, -1                       |
 333    sts b+1, r25    ;  *movhi         |  sts b+1, r25     ;  *movhi
 334    sts b,   r24                      |  sts b,   r24
 335    ret                               |  ret
 336
 337    unsigned fn_crc (unsigned x, unsigned y)   (4)
 338    {
 339        for (char i = 8; i--; x <<= 1)
 340            y ^= (x ^ y) & 0x80 ? 79U : 0U;
 341        return y;
 342    }
 343
 344    movw r18, r24   ;  *movhi         |  movw r18, r24    ;  *movhi
 345    movw r24, r22   ;  *movhi         |  movw r24, r22    ;  *movhi
 346    ldi  r22, 8     ;  movqi_insn     |  ldi  r22, 8      ;  movqi_insn
 347   .L13:                              | .L13:
 348    movw r30, r18   ;  *movhi         |  movw r30, r18    ;  *movhi
 349    eor  r30, r24   ;  *xorqi3        |  eor  r30, r24    ;  *xorqi3
 350    eor  r31, r25   ;  *xorqi3        |  eor  r31, r25    ;  *xorqi3
 351    mov  r20, r30   ;  *andhi3        |  mov  r20, r30    ;  *andqi3
 352    andi r20, 1<<7                    |  andi r20, 1<<7
 353    clr  r21                          |
 354    sbrs r30, 7     ;  *sbrx_branchhi |  sbrc r30, 7      ;  *sbrx_branchhi
 355    rjmp .+4                          |
 356    ldi  r20, 79    ;  movqi_insn     |  ldi  r20, 79     ;  movqi_insn
 357    ldi  r21, 0     ;  movqi_insn     |
 358    eor  r24, r20   ;  *xorqi3        |  eor r24, r20     ;  *xorqi3
 359    eor  r25, r21   ;  *xorqi3        |
 360    lsl  r18        ;  *ashlhi3_const |  lsl  r18         ;  *ashlhi3_const
 361    rol  r19                          |  rol  r19
 362    subi r22, 1     ;  *op8.for.cczn.p|  subi r22, 1      ;  *op8.for.cczn.plus
 363    brne .L13       ;  branch_ZN      |  brne .L13        ;  branch_ZN
 364    ret                               |  ret
 365
 366    #define SPDR (*(uint8_t volatile*) 0x2c)     (5)
 367
 368    void fn_PR49807 (long big)
 369    {
 370        SPDR = big >> 24;
 371        SPDR = big >> 16;
 372        SPDR = big >> 8;
 373        SPDR = big;
 374    }
 375
 376    movw r20, r22   ;  *movhi         |  movw r20, r22    ;  *movhi
 377    movw r22, r24   ;  *movhi         |  movw r22, r24    ;  *movhi
 378    mov  r24, r23   ;  *ashrsi3_const |
 379    clr  r27                          |
 380    sbrc r24,7                        |
 381    com  r27                          |
 382    mov  r25, r27                     |
 383    mov  r26, r27                     |
 384    out  0xc, r24   ;  movqi_insn     |  out 0xc, r23     ;  movqi_insn
 385    movw r24, r22   ;  *ashrsi3_const |
 386    clr  r27                          |
 387    sbrc r25, 7                       |
 388    com  r27                          |
 389    mov  r26, r27                     |
 390    out  0xc, r24   ;  movqi_insn     |  out 0xc, r24     ;  movqi_insn
 391    clr  r27        ;  *ashrsi3_const |
 392    sbrc r23, 7                       |
 393    dec  r27                          |
 394    mov  r26, r23                     |
 395    mov  r25, r22                     |
 396    mov  r24, r21                     |
 397    out  0xc, r24   ;  movqi_insn     |  out 0xc, r21     ;  movqi_insn
 398    out  0xc, r20   ;  movqi_insn     |  out 0xc, r20     ;  movqi_insn
 399    ret                               |  ret
 400
 401    The insns of each basic block are traversed from first to last.
 402    Each insn is optimized on its own, or may be fused with the
 403    previous insn like in example (1).
 404       As the insns are traversed, memento_t keeps track of known values
 405    held in the GPRs (general purpse registers) R2 ... R31 by simulating
 406    the effect of the current insn in memento_t.apply_insn().
 407       The basic blocks are traversed in reverse post order so as to
 408    maximize the chance that GPRs from all preceding blocks are known,
 409    which is the case in example (2).  The traversal of the basic block
 410    is performed by bbinfo_t.optimize_one_function().
 411       bbinfo_t.optimize_one_block() traverses the insns of a BB and tries
 412    the following optimizations:
 413
 414    bbinfo_t::try_fuse_p
 415       Try to fuse two 8-bit insns to one MOVW like in (1).
 416
 417    bbinfo_t::try_simplify_p
 418       Only perform the simplest optimizations that don't impede the
 419       traceability of the generated code, which are:
 420       - Transform operations like  Rn = Rn=0 ^ Rm  to  Rn = Rm.
 421       - Remove insns that are no-ops like  Rn = Rn ^ Rm=0.
 422
 423    bbinfo_t::try_bin_arg1_p
 424       In insns like  EOR Rn, arg1  where arg1 is known or is a reg that
 425       dies in the insn, *and* there is a different register Rm that's
 426       known to contain the same value, then arg1 is replaced with Rm.
 427
 428    bbinfo_t::try_split_ldi_p
 429       Tries to simplify loads of constants like in examples (1), (2) and (3).
 430       It may use arithmetic instructions like AND with registers that
 431       are holding known values when this is profitable.
 432
 433    bbinfo_t::try_split_any_p
 434       Split all insns where the operation can be performed on individual
 435       bytes, like andsi3.  In example (4) the andhi3 can be optimized
 436       to an andqi3.
 437
 438    bbinfo_t::try_mem0_p
 439       Try to fuse a mem = reg insn to mem = __zero_reg__.
 440       This should only occur when -msplit-ldst is on, but may
 441       also occur with pushes since push<mode>1 splits them.
 442 */
 443
 444
 445 // A basic block with additional information like the GPR state.
 446 // The main entry point for the pass.  Runs various strategies
 447 // like try_fuse, try_simplify, try_bin_arg1, try_split_ldi, try_split_any
 448 // depending on -mfuse-add=<0,11>.
 449 struct bbinfo_t;
 450
 451 // Additional insn information on a  REG = non-memory  single_set insn
 452 // for quick access.  Only valid when the m_size member is non-zero.
 453 struct insninfo_t;
 454
 455 // Helper classes with data needed by the try_xxx optimizers.
 456 struct optimize_data_t;
 457 struct insn_optimize_data_t;
 458
 459 // Records which GPRs R0 ... R31 are holding a known value,
 460 // and which values these are.
 461 struct memento_t;
 462
 463 // Abstract Interpretation of expressions.
 464 // absint_val_t represents an 8-bit value that equals the content of
 465 //    some GPR, or equals some known value (or both, or none of them).
 466 // absint_byte_t represents an 8-bit entity that is equivalent to
 467 //    an absint_val_t, or is equivalent to some (unary or binary) operation
 468 //    on absint_val_t's like NOT, AND, IOR, XOR that operate bit-wise (and
 469 //    hence also byte-wise).
 470 // absint_t represents an array of absint_byte_t's.  When some insn is applied
 471 //    to a GPR state, then memento_t.apply_insn() represents the RHS of
 472 //    a single_set as an absint_t, and then applies that result to the GPRs.
 473 //    For example, in  int y = x << 8  the representation is  x = [r25; r24]
 474 //    and  RHS = [r24; 00].
 475 struct absint_val_t;
 476 class absint_byte_t;
 477 struct absint_t;
 478
 479 // A ply_t is a potential step towards an optimal sequence to load a constant
 480 // value into a multi-byte register.  A ply_t loosely relates to one AVR
 481 // instruction, but it may also represent a sequence of instructions.
 482 // For example, loading a constant into a lower register when no sratch reg
 483 // is available may take up to 4 instructions.  There is no 1:1 correspondence
 484 // to insns, either.
 485 //    try_split_ldi determines the best sequence of ply_t's by means of a
 486 // brute-force search with tree pruning:  It's much too complicated to
 487 // construct a good sequence directly, but there are many conditions that
 488 // good sequence will satisfy, implemented in bbinfo_t::find_plies.
 489 struct ply_t;
 490 struct plies_t;
 491
 492 // The maximal number of ply_t's in any conceivable optimal solution
 493 // that is better than what a vanilla mov<mode> generates.
 494 // This is 6 for modes <= 4 and 8 for modes == 8.
 495 static constexpr int N_BEST_PLYS = 8;
 496
 497 #define FUSE_MOVE_MAX_MODESIZE 8
 498
 499 #include "avr-passes-fuse-move.h"
 500
 501 // Static members.
 502
 503 gprmask_t memento_t::fixed_regs_mask;
 504
 505 // Statistics.
 506 int ply_t::n_ply_ts;
 507 int ply_t::max_n_ply_ts;
 508 int plies_t::max_n_plies;
 509
 510 bbinfo_t *bbinfo_t::bb_info;
 511 int bbinfo_t::tick;
 512 bbinfo_t::find_plies_data_t *bbinfo_t::fpd;
 513
 514 // Which optimizations should be performed.
 515 bool bbinfo_t::try_fuse_p;
 516 bool bbinfo_t::try_bin_arg1_p;
 517 bool bbinfo_t::try_split_ldi_p;
 518 bool bbinfo_t::try_split_any_p;
 519 bool bbinfo_t::try_simplify_p;
 520 bool bbinfo_t::use_arith_p;
 521 bool bbinfo_t::use_set_some_p;
 522 bool bbinfo_t::try_mem0_p;
 523
 524
 525 // Abstract Interpretation of expressions.
 526 // A bunch of absint_byte_t's.
 527
 528 struct absint_t
 529 {
 530   static constexpr int eq_size = FUSE_MOVE_MAX_MODESIZE;
 531   std::array<absint_byte_t, eq_size> eq;
 532
 533   rtx xexp = NULL_RTX;
 534   rtx xexp_new = NULL_RTX;
 535
 536   absint_byte_t &operator[] (int i)
 537   {
 538     gcc_assert (IN_RANGE (i, 0, absint_t::eq_size - 1));
 539     return eq[i];
 540   }
 541
 542   const absint_byte_t &operator[] (int i) const
 543   {
 544     gcc_assert (IN_RANGE (i, 0, absint_t::eq_size - 1));
 545     return eq[i];
 546   }
 547
 548   absint_t () {}
 549
 550   absint_t (rtx xold)
 551     : xexp(xold)
 552   {}
 553
 554   absint_t (rtx xold, rtx xnew, int n_bytes)
 555     : xexp(xold), xexp_new(xnew)
 556   {
 557     gcc_assert (n_bytes <= eq_size);
 558     if (xnew)
 559       for (int i = 0; i < n_bytes; ++i)
 560         eq[i].learn_val8 (avr_uint8 (xnew, i));
 561   }
 562
 563   // CODE != UNKNOWN: Maximal index of a byte with code CODE, or -1.
 564   // CODE == UNKNOWN: Maximal index of a byte with known CODE, or -1.
 565   int max_knows (rtx_code code = UNKNOWN) const
 566   {
 567     for (int i = eq_size - 1; i >= 0; --i)
 568       if ((code == UNKNOWN && ! eq[i].can (UNKNOWN))
 569           || (code != UNKNOWN && eq[i].can (code)))
 570         return i;
 571     return -1;
 572   }
 573
 574   // CODE != UNKNOWN: Maximal i such that all bytes < i have code CODE.
 575   // CODE == UNKNOWN: Maximal i such that all bytes < i have code != UNKNOWN.
 576   int end_knows (rtx_code code = UNKNOWN) const
 577   {
 578     for (int i = 0; i < eq_size; ++i)
 579       if ((code == UNKNOWN && eq[i].can (UNKNOWN))
 580           || (code != UNKNOWN && ! eq[i].can (code)))
 581         return i;
 582     return eq_size;
 583   }
 584
 585   // Number of bytes for which there is usable information.
 586   int popcount () const
 587   {
 588     int pop = 0;
 589     for (int i = 0; i < eq_size; ++i)
 590       pop += ! eq[i].can (UNKNOWN);
 591     return pop;
 592   }
 593
 594   // Get the value under the assumption that all eq[].val8 are known.
 595   uint64_t get_value (int n_bytes, bool strict = true) const
 596   {
 597     gcc_assert (IN_RANGE (n_bytes, 1, eq_size));
 598     gcc_assert (! strict || end_knows (CONST_INT) >= n_bytes);
 599
 600     uint64_t val = 0;
 601     for (int i = n_bytes - 1; i >= 0; --i)
 602       val = 256 * val + eq[i].val8 (strict);
 603     return val;
 604   }
 605
 606   // Get n-byte value as a const_int, or NULL_RTX when (partially) unknown.
 607   rtx get_value_as_const_int (int n_bytes) const
 608   {
 609     gcc_checking_assert (gpr_regno_p (REG_24, n_bytes));
 610
 611     if (end_knows (CONST_INT) < n_bytes)
 612       return NULL_RTX;
 613
 614     const uint64_t val = get_value (n_bytes);
 615     const machine_mode mode = size_to_mode (n_bytes);
 616
 617     return gen_int_mode (val, mode);
 618   }
 619
 620   // Find a 16-bit register that contains the same value like held
 621   // in positions I1 and I2 (if any).  Return 0 when nothing appropriate
 622   // for a MOVW is found.
 623   int reg16_with_value (int i1, int i2, const memento_t &memo) const
 624   {
 625     if (i1 == (i2 ^ 1))
 626       {
 627         const int lo8 = eq[i1 & ~1].val8 (false);
 628         const int hi8 = eq[i1 | 1].val8 (false);
 629         if (lo8 >= 0 && hi8 >= 0)
 630           return memo.reg16_with_value (lo8, hi8, 0);
 631       }
 632     return 0;
 633   }
 634
 635   // When X is a REG rtx with a known content as of MEMO, then return
 636   // the respective value as a constant for mode MODE.
 637   // If X is NULL_RTX, or not a REG, or not known, then return NULL_RTX.
 638   static rtx maybe_fold (rtx x, const memento_t &memo)
 639   {
 640     int n_bytes;
 641
 642     if (x != NULL_RTX
 643         && REG_P (x)
 644         && (n_bytes = GET_MODE_SIZE (GET_MODE (x))) <= FUSE_MOVE_MAX_MODESIZE
 645         && gpr_regno_p (REGNO (x), n_bytes))
 646       {
 647         rtx xval = memo.get_value_as_const_int (REGNO (x), n_bytes);
 648         if (xval)
 649           return avr_chunk (GET_MODE (x), xval, 0);
 650       }
 651
 652     return NULL_RTX;
 653   }
 654
 655   // Try to conclude about the bytes that comprise X.  DEST_MODE is the
 656   // context mode that is used when X is CONST_INT and has VOIDmode.
 657   static absint_t explore (rtx x, const memento_t &memo,
 658                            machine_mode dest_mode = VOIDmode)
 659   {
 660     const rtx_code code = GET_CODE (x);
 661     bool worth_dumping = dump_file && (dump_flags & TDF_FOLDING);
 662
 663     const machine_mode mode = GET_MODE (x) == VOIDmode
 664       ? dest_mode
 665       : GET_MODE (x);
 666
 667     const int n_bytes = mode == VOIDmode && CONST_INT_P (x)
 668       ? absint_t::eq_size
 669       : GET_MODE_SIZE (mode);
 670
 671     if (! IN_RANGE (n_bytes, 1, absint_t::eq_size))
 672       return absint_t (x);
 673
 674     // Eat our own dog food as produced by try_plit_ldi.
 675
 676     rtx xop0 = BINARY_P (x) || UNARY_P (x) ? XEXP (x, 0) : NULL_RTX;
 677     rtx xval0 = xop0 && CONST_INT_OR_FIXED_P (xop0)
 678       ? xop0
 679       : absint_t::maybe_fold (xop0, memo);
 680
 681     if (UNARY_P (x)
 682         && REG_P (xop0)
 683         && GET_MODE (xop0) == mode
 684         && xval0)
 685       {
 686         rtx y = simplify_unary_operation (code, mode, xval0, mode);
 687         if (y && CONST_INT_OR_FIXED_P (y))
 688           return absint_t (x, y, n_bytes);
 689       }
 690
 691     rtx xop1 = BINARY_P (x) ? XEXP (x, 1) : NULL_RTX;
 692     rtx xval1 = xop1 && CONST_INT_OR_FIXED_P (xop1)
 693       ? xop1
 694       : absint_t::maybe_fold (xop1, memo);
 695
 696     if (BINARY_P (x)
 697         && xval0 && xval1)
 698       {
 699         rtx y = simplify_binary_operation (code, mode, xval0, xval1);
 700         if (y && CONST_INT_OR_FIXED_P (y))
 701           return absint_t (x, y, n_bytes);
 702       }
 703
 704     // No fold to a constant value was found:
 705     // Look at the individual bytes more closely.
 706
 707     absint_t ai (x);
 708
 709     switch (code)
 710       {
 711       default:
 712         worth_dumping = false;
 713         break;
 714
 715       case REG:
 716         if (END_REGNO (x) <= REG_32
 717             && ! (regmask (x) & memento_t::fixed_regs_mask))
 718           for (unsigned r = REGNO (x); r < END_REGNO (x); ++r)
 719             {
 720               ai[r - REGNO (x)].learn_regno (r);
 721               if (memo.knows (r))
 722                 ai[r - REGNO (x)].learn_val8 (memo.value (r));
 723             }
 724         break;
 725
 726       CASE_CONST_UNIQUE:
 727         ai = absint_t (x, x, n_bytes);
 728         break;
 729
 730       case ASHIFT:
 731       case ASHIFTRT:
 732       case LSHIFTRT:
 733       case ROTATE:
 734       case ROTATERT:
 735         if ((CONST_INT_P (xop1) && INTVAL (xop1) >= 8)
 736             // DImode shift offsets for transparent calls are shipped in R16.
 737             || n_bytes == 8)
 738           ai = explore_shift (x, memo);
 739         break;
 740
 741       case AND:
 742       case IOR:
 743       case XOR:
 744         {
 745           const absint_t ai0 = absint_t::explore (xop0, memo, mode);
 746           const absint_t ai1 = absint_t::explore (xop1, memo, mode);
 747           for (int i = 0; i < n_bytes; ++i)
 748             ai[i] = absint_byte_t (code, ai0[i], ai1[i]);
 749         }
 750         break;
 751
 752       case NOT:
 753         {
 754           const absint_t ai0 = absint_t::explore (xop0, memo);
 755           for (int i = 0; i < n_bytes; ++i)
 756             ai[i] = absint_byte_t (NOT, ai0[i]);
 757         }
 758         break;
 759
 760       case ZERO_EXTEND:
 761       case SIGN_EXTEND:
 762         {
 763           const absint_t ai0 = absint_t::explore (xop0, memo);
 764           const int ai0_size = GET_MODE_SIZE (GET_MODE (xop0));
 765           const absint_byte_t b_signs = ai0[ai0_size - 1].get_signs (code);
 766           for (int i = 0; i < n_bytes; ++i)
 767             ai[i] = i < ai0_size ? ai0[i] : b_signs;
 768         }
 769         break;
 770
 771       case PLUS:
 772       case MINUS:
 773         if (SCALAR_INT_MODE_P (mode)
 774             || ALL_SCALAR_FIXED_POINT_MODE_P (mode))
 775           {
 776             const absint_t ai0 = absint_t::explore (xop0, memo, mode);
 777             const absint_t ai1 = absint_t::explore (xop1, memo, mode);
 778             if (code == MINUS)
 779               for (int i = 0; i < n_bytes && ai1[i].val8 (false) == 0; ++i)
 780                 ai[i] = ai0[i];
 781
 782             if (code == PLUS)
 783               for (int i = 0; i < n_bytes; ++i)
 784                 {
 785                   if (ai0[i].val8 (false) == 0)
 786                     ai[i] = ai1[i];
 787                   else if (ai1[i].val8 (false) == 0)
 788                     ai[i] = ai0[i];
 789                   else
 790                     {
 791                       ai[i] = absint_byte_t (code, ai0[i], ai1[i]);
 792                       break;
 793                     }
 794                 }
 795
 796             if (code == PLUS
 797                 && GET_CODE (xop0) == ZERO_EXTEND
 798                 && CONST_INT_P (xop1))
 799               {
 800                 rtx exop = XEXP (xop0, 0);
 801                 int exsize = GET_MODE_SIZE (GET_MODE (exop));
 802                 rtx lo_xop1 = avr_chunk (GET_MODE (exop), xop1, 0);
 803                 if (lo_xop1 == const0_rtx)
 804                   for (int i = exsize; i < n_bytes; ++i)
 805                     ai[i] = ai1[i];
 806               }
 807           }
 808         break; // PLUS, MINUS
 809
 810       case MULT:
 811         if (GET_MODE (xop0) == mode
 812             && SCALAR_INT_MODE_P (mode))
 813           {
 814             // The constant may be located in xop0's zero_extend...
 815             const absint_t ai0 = absint_t::explore (xop0, memo, mode);
 816             const absint_t ai1 = absint_t::explore (xop1, memo, mode);
 817             const int end0 = ai0.end_knows (CONST_INT);
 818             const int end1 = ai1.end_knows (CONST_INT);
 819             const uint64_t mul0 = end0 > 0 ? ai0.get_value (end0) : 1;
 820             const uint64_t mul1 = end1 > 0 ? ai1.get_value (end1) : 1;
 821             // Shifting in off/8 zero bytes from the right.
 822             const int off = mul0 * mul1 != 0 ? ctz_hwi (mul0 * mul1) : 0;
 823             for (int i = 0; i < off / 8; ++i)
 824               ai[i].learn_val8 (0);
 825           }
 826         break; // MULT
 827
 828       case BSWAP:
 829         if (GET_MODE (xop0) == mode)
 830           {
 831             const absint_t ai0 = absint_t::explore (xop0, memo);
 832             for (int i = 0; i < n_bytes; ++i)
 833               ai[i] = ai0[n_bytes - 1 - i];
 834           }
 835         break;
 836       } // switch code
 837
 838     if (worth_dumping)
 839       {
 840         avr_dump (";; AI.explore %C:%m ", code, mode);
 841         ai.dump ();
 842       }
 843
 844     for (int i = 0; i < n_bytes; ++i)
 845       gcc_assert (ai[i].check ());
 846
 847     return ai;
 848   }
 849
 850   // Helper for the method above.
 851   static absint_t explore_shift (rtx x, const memento_t &memo)
 852   {
 853     absint_t ai (x);
 854
 855     const rtx_code code = GET_CODE (x);
 856     const machine_mode mode = GET_MODE (x);
 857     const int n_bytes = GET_MODE_SIZE (mode);
 858
 859     if (! BINARY_P (x))
 860       return ai;
 861
 862     rtx xop0 = XEXP (x, 0);
 863     rtx xop1 = XEXP (x, 1);
 864
 865     // Look at shift offsets of DImode more closely;
 866     // they are in R16 for __lshrdi3 etc.  Patch xop1 on success.
 867     if (n_bytes == 8
 868         && ! CONST_INT_P (xop1)
 869         && GET_MODE (xop0) == mode)
 870       {
 871         const int n_off = GET_MODE_SIZE (GET_MODE (xop1));
 872         const absint_t aoff = absint_t::explore (xop1, memo);
 873         xop1 = aoff.get_value_as_const_int (n_off);
 874       }
 875
 876     if (! xop1
 877         || GET_MODE (xop0) != mode
 878         || ! IN_RANGE (n_bytes, 1, FUSE_MOVE_MAX_MODESIZE)
 879         || ! CONST_INT_P (xop1)
 880         || ! IN_RANGE (INTVAL (xop1), 8, 8 * n_bytes - 1))
 881       return ai;
 882
 883     const int off = INTVAL (xop1);
 884     const absint_t ai0 = absint_t::explore (xop0, memo);
 885
 886     switch (GET_CODE (x))
 887       {
 888       default:
 889         break;
 890
 891       case ASHIFT:
 892         // Shifting in 0x00's from the right.
 893         for (int i = 0; i < off / 8; ++i)
 894           ai[i].learn_val8 (0);
 895         break;
 896
 897       case LSHIFTRT:
 898       case ASHIFTRT:
 899         {
 900           // Shifting in 0x00's or signs from the left.
 901           absint_byte_t b_signs = ai0[n_bytes - 1].get_signs (GET_CODE (x));
 902           for (int i = n_bytes - off / 8; i < n_bytes; ++i)
 903             ai[i] = b_signs;
 904           if (off == 8 * n_bytes - 1)
 905             if (code == ASHIFTRT)
 906               ai[0] = b_signs;
 907         }
 908         break;
 909       }
 910
 911     if (off % 8 != 0
 912         || ai0.popcount () == 0)
 913       return ai;
 914
 915     // For shift offsets that are a multiple of 8, record the
 916     // action on the constituent bytes.
 917
 918     // Bytes are moving left by this offset (or zero for "none").
 919     const int boffL = select<int>()
 920       : code == ROTATE || code == ASHIFT ? off / 8
 921       : code == ROTATERT ? n_bytes - off / 8
 922       : 0;
 923
 924     // Bytes are moving right by this offset (or zero for "none").
 925     const int boffR = select<int>()
 926       : code == ROTATERT || code == ASHIFTRT || code == LSHIFTRT ? off / 8
 927       : code == ROTATE ? n_bytes - off / 8
 928       : 0;
 929
 930     if (dump_flags & TDF_FOLDING)
 931       {
 932         avr_dump (";; AI.explore_shift %C:%m ", code, mode);
 933         if (boffL)
 934           avr_dump ("<< %d%s", 8 * boffL, boffL && boffR ? ", " : "");
 935         if (boffR)
 936           avr_dump (">> %d", 8 * boffR);
 937         avr_dump ("\n");
 938       }
 939
 940     if (boffL)
 941       for (int i = 0; i < n_bytes - boffL; ++i)
 942         ai[i + boffL] = ai0[i];
 943
 944     if (boffR)
 945       for (int i = boffR; i < n_bytes; ++i)
 946         ai[i - boffR] = ai0[i];
 947
 948     return ai;
 949   }
 950
 951   void dump (const char *msg = nullptr, FILE *f = dump_file) const
 952   {
 953     if (f)
 954       dump (NULL_RTX, msg, f);
 955   }
 956
 957   void dump (rtx dest, const char *msg = nullptr, FILE *f = dump_file) const
 958   {
 959     if (f)
 960       {
 961         int regno = dest && REG_P (dest) ? REGNO (dest) : 0;
 962
 963         msg = msg && msg[0] ? msg : "AI=[%s]\n";
 964         const char *const xs = strstr (msg, "%s");
 965         gcc_assert (xs);
 966
 967         fprintf (f, "%.*s", (int) (xs - msg), msg);
 968         for (int i = max_knows (); i >= 0; --i)
 969           {
 970             const int sub_regno = eq[i].regno (false /*nonstrict*/);
 971             const bool nop = regno &&  sub_regno == regno + i;
 972             eq[i].dump (nop ? "%s=nop" : "%s", f);
 973             fprintf (f, "%s", i ? "; " : "");
 974           }
 975         fprintf (f, "%s", xs + strlen ("%s"));
 976       }
 977   }
 978 }; // absint_t
 979
 980
 981 // Information for a REG = non-memory single_set.
 982
 983 struct insninfo_t
 984 {
 985   // This is an insn that sets the m_size bytes of m_regno to either
 986   // - A compile time constant m_isrc (m_code = CONST_INT), or
 987   // - The contents of register number m_rsrc (m_code = REG).
 988   int m_size = 0;
 989   int m_regno;
 990   int m_rsrc;
 991   rtx_code m_code;
 992   uint64_t m_isrc;
 993   rtx_insn *m_insn = nullptr;
 994   rtx m_set = NULL_RTX;
 995   rtx m_src = NULL_RTX;
 996   int m_scratch = 0; // 0 or the register number of a QImode scratch.
 997   rtx_code m_old_code = UNKNOWN;
 998
 999   // Knowledge about the bytes of the SET_SRC:  A byte may have a known
1000   // value, may be known to equal some register (e.g. with BSWAP),
1001   // or both, or may be unknown.
1002   absint_t m_ai;
1003
1004   // May be set for binary operations.
1005   absint_byte_t m_new_src;
1006
1007   bool init1 (insn_optimize_data_t &, int max_size, const char *purpose);
1008
1009   // Upper bound for the cost (in words) of a move<mode> insn that
1010   // performs a REG = CONST_XXX = .m_isrc move of modesize .m_size.
1011   int cost () const;
1012   bool combine (const insninfo_t &prev, const insninfo_t &curr);
1013   int emit_insn () const;
1014
1015   bool needs_scratch () const
1016   {
1017     gcc_assert (m_code == CONST_INT);
1018
1019     for (int i = 0; i < m_size; ++i)
1020       if (AVRasm::ldi_needs_scratch (m_regno, m_isrc >> (8 * i)))
1021         return true;
1022
1023     return false;
1024   }
1025
1026   int hamming (const memento_t &memo) const
1027   {
1028     gcc_assert (m_code == CONST_INT);
1029
1030     int h = 0;
1031     for (int i = 0; i < m_size; ++i)
1032       h += ! memo.have_value (m_regno + i, 1, 0xff & (m_isrc >> (8 * i)));
1033
1034     return h;
1035   }
1036
1037   // Upper bound for the number of ply_t's of a solution, given Hamming
1038   // distance of HAMM (-1 for unknown).
1039   int n_best_plys (int hamm = -1) const
1040   {
1041     gcc_assert (m_code == CONST_INT);
1042
1043     if (m_size == 8)
1044       return (hamm >= 0 ? hamm : m_size);
1045     else if (hamm <= 4)
1046       return (hamm >= 0 ? hamm : m_size)
1047         // The following terms is the max number of MOVWs with a
1048         // Hamming difference of less than 2.
1049         + (AVR_HAVE_MOVW && m_regno < REG_14) * m_size / 2
1050         + (AVR_HAVE_MOVW && m_regno == REG_14) * std::max (0, m_size - 2)
1051         - (AVR_HAVE_MOVW && hamm == 4 && (uint32_t) m_isrc % 0x10001 == 0);
1052     else
1053       gcc_unreachable ();
1054   }
1055 }; // insninfo_t
1056
1057
1058 struct insn_optimize_data_t
1059 {
1060   // Known values held in GPRs prior to the action of .insn / .ii,
1061   memento_t &regs;
1062   rtx_insn *insn;
1063   insninfo_t ii;
1064   bool unused;
1065
1066   insn_optimize_data_t () = delete;
1067
1068   insn_optimize_data_t (memento_t &memo)
1069     : regs(memo)
1070   {}
1071 }; // insn_optimize_data_t
1072
1073 struct optimize_data_t
1074 {
1075   insn_optimize_data_t prev;
1076   insn_optimize_data_t curr;
1077
1078   // Number >= 0 of new insns that replace the curr insn and maybe also the
1079   // prev insn.  -1 when no replacement has been found.
1080   int n_new_insns = -1;
1081
1082   // .prev will be removed provided we have (potentially zero) new insns.
1083   bool delete_prev_p = false;
1084
1085   // Ignore these GPRs when comparing the simulation results of
1086   // old and new insn sequences.  Usually some scratch reg(s).
1087   gprmask_t ignore_mask = 0;
1088
1089   optimize_data_t () = delete;
1090
1091   optimize_data_t (memento_t &prev_regs, memento_t &curr_regs)
1092     : prev(prev_regs), curr(curr_regs)
1093   {}
1094
1095   bool try_fuse (bbinfo_t *);
1096   bool try_mem0 (bbinfo_t *);
1097   bool try_bin_arg1 (bbinfo_t *);
1098   bool try_simplify (bbinfo_t *);
1099   bool try_split_ldi (bbinfo_t *);
1100   bool try_split_any (bbinfo_t *);
1101   bool fail (const char *reason);
1102   bool emit_signs (int r_sign, gprmask_t);
1103   void emit_move_mask (int dest, int src, int n_bytes, gprmask_t &);
1104   rtx_insn *emit_sequence (basic_block, rtx_insn *);
1105   bool get_2ary_operands (rtx_code &, const absint_byte_t &,
1106                           insn_optimize_data_t &, int r_dest,
1107                           absint_val_t &, absint_val_t &, int &ex_cost);
1108   rtx_insn *emit_and_apply_move (memento_t &, rtx dest, rtx src);
1109
1110   // M2 is the state of GPRs as the sequence starts; M1 is the state one before.
1111   static void apply_sequence (const std::vector<rtx_insn *> &insns,
1112                               memento_t &m1, memento_t &m2)
1113   {
1114     gcc_assert (insns.size () >= 1);
1115
1116     for (auto &i : insns)
1117       {
1118         m1 = m2;
1119         m2.apply_insn (i, false);
1120       }
1121   }
1122 }; // optimize_data_t
1123
1124
1125 // Emit INSNS before .curr.insn, replacing .curr.insn and also .prev.insn when
1126 // .delete_prev_p is on.  Adjusts .curr.regs and .prev.regs accordingly.
1127 rtx_insn *
1128 optimize_data_t::emit_sequence (basic_block bb, rtx_insn *insns)
1129 {
1130   gcc_assert (n_new_insns >= 0);
1131
1132   // The old insns will be replaced by and simulated...
1133   const std::vector<rtx_insn *> old_insns = delete_prev_p
1134     ? std::vector<rtx_insn *> { prev.insn, curr.insn }
1135     : std::vector<rtx_insn *> { curr.insn };
1136
1137   // ...against the new insns.
1138   std::vector<rtx_insn *> new_insns;
1139   for (rtx_insn *i = insns; i; i = NEXT_INSN (i))
1140     new_insns.push_back (i);
1141
1142   rtx_insn *new_curr_insn;
1143
1144   memento_t &m1 = prev.regs;
1145   memento_t &m2 = curr.regs;
1146
1147   if (new_insns.empty ())
1148     {
1149       if (delete_prev_p)
1150         {
1151           m2 = m1;
1152           m1.known = 0;
1153           new_curr_insn = prev_nondebug_insn_bb (bb, prev.insn);
1154         }
1155       else
1156         new_curr_insn = prev.insn;
1157     }
1158   else
1159     {
1160       // We are going to emit at least one new insn.  Simulate the effect of
1161       // the new sequence and compare it against the effect of the old one.
1162       // Both effects must be the same (modulo scratch regs).
1163
1164       memento_t n1 = m1;
1165       memento_t n2 = m2;
1166
1167       if (delete_prev_p)
1168         {
1169           m2 = m1, m1.known = 0;
1170           n2 = n1, n1.known = 0;
1171         }
1172
1173       avr_dump (";; Applying new route...\n");
1174       optimize_data_t::apply_sequence (new_insns, n1, n2);
1175
1176       avr_dump (";; Applying old route...\n");
1177       optimize_data_t::apply_sequence (old_insns, m1, m2);
1178       avr_dump ("\n");
1179
1180       if (! m2.equals (n2, ignore_mask))
1181         {
1182           // When we come here, then
1183           // - We have a genuine bug, and/or
1184           // - We did produce insns that are opaque to absint_t's explore().
1185           avr_dump ("INCOMPLETE APPLICATION:\n");
1186           m2.dump ("regs old route=%s\n\n");
1187           n2.dump ("regs new route=%s\n\n");
1188           avr_dump ("The new insns are:\n%L", insns);
1189
1190           fatal_insn ("incomplete application of insn", insns);
1191         }
1192
1193       // Use N1 and N2 as the new GPR states.  Even though they are equal
1194       // modulo ignore_mask, N2 may know more about GPRs when it doesn't
1195       // clobber the scratch reg.
1196       m1 = n1;
1197       m2 = n2;
1198
1199       emit_insn_before (insns, curr.insn);
1200
1201       new_curr_insn = new_insns.back ();
1202     }
1203
1204   if (delete_prev_p)
1205     SET_INSN_DELETED (prev.insn);
1206
1207   SET_INSN_DELETED (curr.insn);
1208
1209   return new_curr_insn;
1210 }
1211
1212
1213 const pass_data avr_pass_data_fuse_move =
1214 {
1215   RTL_PASS,      // type
1216   "",            // name (will be patched)
1217   OPTGROUP_NONE, // optinfo_flags
1218   TV_MACH_DEP,   // tv_id
1219   0,             // properties_required
1220   0,             // properties_provided
1221   0,             // properties_destroyed
1222   0,             // todo_flags_start
1223   TODO_df_finish | TODO_df_verify // todo_flags_finish
1224 };
1225
1226
1227 class avr_pass_fuse_move : public rtl_opt_pass
1228 {
1229 public:
1230   avr_pass_fuse_move (gcc::context *ctxt, const char *name)
1231     : rtl_opt_pass (avr_pass_data_fuse_move, ctxt)
1232   {
1233     this->name = name;
1234   }
1235
1236   unsigned int execute (function *func) final override
1237   {
1238     if (optimize > 0 && avropt_fuse_move > 0)
1239       {
1240         df_note_add_problem ();
1241         df_analyze ();
1242
1243         bbinfo_t::optimize_one_function (func);
1244       }
1245
1246     return 0;
1247   }
1248 }; // avr_pass_fuse_move
1249
1250
1251 // Append PLY to .plies[].  A SET or BLD ply may start a new sequence of
1252 // SETs or BLDs and gets assigned the overhead of the sequence like for an
1253 // initial SET or CLT instruction.  A SET ply my be added in two flavours:
1254 // One that starts a sequence of single_sets, and one that represents the
1255 // payload of a set_some insn.  MEMO is the GPR state prior to PLY.
1256 void
1257 plies_t::add (ply_t ply, const ply_t *prev, const memento_t &memo,
1258               bool maybe_set_some)
1259 {
1260   if (ply.code == SET)
1261     {
1262       if (prev && prev->code == SET)
1263         {
1264           // Proceed with the SET sequence flavour.
1265           ply.in_set_some = prev->in_set_some;
1266
1267           if (ply.in_set_some)
1268             ply.scratch = 0;
1269           else if (! ply.scratch && ply.needs_scratch ())
1270             ply.cost += 2;
1271         }
1272       else
1273         {
1274           // The 1st SET in a sequence.  May use set_some to set
1275           // all bytes in one insn, or a bunch of single_sets.
1276
1277           // Route1: Bunch of single_sets.
1278           const int ply_cost = ply.cost;
1279           if (! ply.scratch && ply.needs_scratch ())
1280             ply.cost += 2;
1281           ply.in_set_some = false;
1282
1283           add (ply);
1284
1285           if (maybe_set_some)
1286             {
1287               // Route 2: One set_some: The 1st SET gets all the overhead.
1288               ply.scratch = 0;
1289               ply.cost = ply_cost + 1 + ! memo.known_dregno ();
1290               ply.in_set_some = true;
1291             }
1292         }
1293     } // SET
1294   else if (ply.is_bld ())
1295     {
1296       // The first BLD in a series of BLDs gets the extra costs
1297       // for the SET / CLT that precedes the BLDs.
1298       ply.cost += ! ply.is_same_bld (prev);
1299     }
1300
1301   add (ply);
1302 }
1303
1304
1305 // Emit insns for .plies[] and return the number of emitted insns.
1306 // The emitted insns represent the effect of II with MEMO, which
1307 // is the GPR knowledge before II is executed.
1308 int
1309 plies_t::emit_insns (const insninfo_t &ii, const memento_t &memo) const
1310 {
1311   int n_insns = 0;
1312
1313   for (int i = 0; i < n_plies; ++i)
1314     {
1315       const ply_t &p = plies[i];
1316
1317       // SETs and BLDs are dumped by their emit_xxxs().
1318       if (p.code != SET && ! p.is_bld ())
1319         p.dump ();
1320
1321       rtx src1 = NULL_RTX;
1322       rtx src2 = NULL_RTX;
1323       rtx dest = NULL_RTX;
1324       rtx xscratch = NULL_RTX;
1325       rtx_code code = p.code;
1326
1327       switch (p.code)
1328         {
1329         default:
1330           avr_dump ("\n\n;; Bad ply_t:\n");
1331           p.dump (i + 1);
1332           gcc_unreachable ();
1333           break;
1334
1335         case REG: // *movhi = MOVW; movqi_insn = MOV
1336           dest = gen_rtx_REG (p.size == 1 ? QImode : HImode, p.regno);
1337           src1 = gen_rtx_REG (p.size == 1 ? QImode : HImode, p.arg);
1338           break;
1339
1340         case SET: // movqi_insn = LDI, CLR; set_some = (LDI + MOV) ** size.
1341           i += emit_sets (ii, n_insns, memo, i) - 1;
1342           continue;
1343
1344         case MOD: // *ior<mode>3, *and<mode>3 = SET + BLD... / CLT + BLD...
1345           i += emit_blds (ii, n_insns, i) - 1;
1346           continue;
1347
1348         case MINUS: // *subqi3 = SUB
1349         case PLUS:  // *addqi3 = ADD
1350         case AND: // *andqi3 = AND
1351         case IOR: // *iorqi3 = OR
1352         case XOR: // *xorqi3 = EOR
1353           dest = gen_rtx_REG (QImode, p.regno);
1354           src2 = gen_rtx_REG (QImode, p.arg);
1355           break;
1356
1357         case PRE_INC: // *addqi3 = INC
1358         case PRE_DEC: // *addqi3 = DEC
1359           code = PLUS;
1360           dest = gen_rtx_REG (QImode, p.regno);
1361           src2 = p.code == PRE_INC ? const1_rtx : constm1_rtx;
1362           break;
1363
1364         case NEG: // *negqi2 = NEG
1365         case NOT: // *one_cmplqi2 = COM
1366           dest = gen_rtx_REG (QImode, p.regno);
1367           src1 = dest;
1368           break;
1369
1370         case ROTATE:   // *rotlqi3 = SWAP
1371         case ASHIFT:   // *ashlqi3 = LSL
1372         case ASHIFTRT: // *ashrqi3 = ASR
1373         case LSHIFTRT: // *lshrqi3 = LSR
1374           dest = gen_rtx_REG (QImode, p.regno);
1375           src2 = GEN_INT (code == ROTATE ? 4 : 1);
1376           break;
1377
1378         case SS_PLUS: // *addhi3 = ADIW, SBIW
1379           code = PLUS;
1380           dest = gen_rtx_REG (HImode, p.regno);
1381           src2 = gen_int_mode (p.arg, HImode);
1382           break;
1383         } // switch p.code
1384
1385       gcc_assert (dest && (! src1) + (! src2) == 1);
1386
1387       rtx src = code == REG || code == SET
1388         ? src1
1389         : (src2
1390            ? gen_rtx_fmt_ee (code, GET_MODE (dest), dest, src2)
1391            : gen_rtx_fmt_e (code, GET_MODE (dest), src1));
1392
1393       emit_valid_move_clobbercc (dest, src, xscratch);
1394       n_insns += 1;
1395     }
1396
1397   return n_insns;
1398 }
1399
1400
1401 // Helper for .emit_insns().  Emit an ior<mode>3 or and<mode>3 insn
1402 // that's equivalent to a sequence of contiguous BLDs starting at
1403 // .plies[ISTART].  Updates N_INSNS according to the number of insns
1404 // emitted and returns the number of consumed plys in .plies[].
1405 int
1406 plies_t::emit_blds (const insninfo_t &ii, int &n_insns, int istart) const
1407 {
1408   const ply_t &first = plies[istart];
1409
1410   gcc_assert (ii.m_size <= 4);
1411   gcc_assert (first.is_bld ());
1412
1413   const rtx_code code = first.is_setbld () ? IOR : AND;
1414   const machine_mode mode = size_to_mode (ii.m_size);
1415
1416   // Determine mask and number of BLDs.
1417
1418   uint32_t mask = 0;
1419   int n_blds = 0;
1420
1421   for (int i = istart; i < n_plies; ++i, ++n_blds)
1422     {
1423       const ply_t &p = plies[i];
1424       if (! p.is_bld () || ! p.is_same_bld (& first))
1425         break;
1426
1427       // For AND, work on the 1-complement of the mask,
1428       // i.e. 1's specify which bits to clear.
1429       uint8_t mask8 = code == IOR ? p.arg : ~p.arg;
1430       mask |= mask8 << (8 * (p.regno - ii.m_regno));
1431     }
1432
1433   mask = GET_MODE_MASK (mode) & (code == IOR ? mask : ~mask);
1434
1435   if (dump_file)
1436     {
1437       fprintf (dump_file, ";; emit_blds[%d...%d] R%d[%d]%s=%0*x\n",
1438                istart, istart + n_blds - 1, ii.m_regno, ii.m_size,
1439                code == IOR ? "|" : "&", 2 * ii.m_size, (int) mask);
1440     }
1441
1442   for (int i = 0; i < n_blds; ++i)
1443     plies[i + istart].dump ();
1444
1445   rtx dest = gen_rtx_REG (mode, ii.m_regno);
1446   rtx src = gen_rtx_fmt_ee (code, mode, dest, gen_int_mode (mask, mode));
1447   rtx xscratch = mode == QImode ? NULL_RTX : gen_rtx_SCRATCH (QImode);
1448
1449   emit_valid_move_clobbercc (dest, src, xscratch);
1450   n_insns += 1;
1451
1452   return n_blds;
1453 }
1454
1455
1456 // Emit insns for a contiguous sequence of SET ply_t's starting at
1457 // .plies[ISTART].  Advances N_INSNS by the number of emitted insns.
1458 // MEMO ist the state of the GPRs before II is executed, where II
1459 // represents the insn under optimization.
1460 // The emitted insns are "movqi_insn" or "*reload_inqi"
1461 // when .plies[ISTART].in_set_some is not set, and one "set_some" insn
1462 // when .plies[ISTART].in_set_some is set.
1463 int
1464 plies_t::emit_sets (const insninfo_t &ii, int &n_insns, const memento_t &memo,
1465                     int istart) const
1466 {
1467   gcc_assert (plies[istart].code == SET);
1468
1469   const bool in_set_some = plies[istart].in_set_some;
1470
1471   // Some d-regno that holds a compile-time constant, or 0.
1472   const int known_dregno = memo.known_dregno ();
1473
1474   // Determine number of contiguous SETs,
1475   // and sort them in ps[] such that smaller regnos come first.
1476
1477   const ply_t *ps[FUSE_MOVE_MAX_MODESIZE];
1478   int n_sets = 0;
1479
1480   for (int i = istart; i < n_plies && plies[i].code == SET; ++i)
1481     ps[n_sets++] = & plies[i];
1482
1483   if (dump_file)
1484     {
1485       fprintf (dump_file, ";; emit_sets[%d...%d] R%d[%d]=%0*" PRIx64,
1486                istart, istart + n_sets - 1, ii.m_regno, ii.m_size,
1487                2 * ii.m_size, ii.m_isrc);
1488       fprintf (dump_file, ", scratch=%s%d", "R" + ! ii.m_scratch, ii.m_scratch);
1489       fprintf (dump_file, ", known_dreg=%s%d, set_some=%d\n",
1490                "R" + ! known_dregno, known_dregno, in_set_some);
1491     }
1492
1493   for (int i = 0; i < n_sets; ++i)
1494     ps[i]->dump ();
1495
1496   // Sort.  This is most useful on regs like (reg:SI REG_14).
1497   for (int i = 0; i < n_sets - 1; ++i)
1498     for (int j = i + 1; j < n_sets; ++j)
1499       if (ps[i]->regno > ps[j]->regno)
1500         std::swap (ps[i], ps[j]);
1501
1502   // Prepare operands.
1503   rtx dst[FUSE_MOVE_MAX_MODESIZE];
1504   rtx src[FUSE_MOVE_MAX_MODESIZE];
1505   for (int i = 0; i < n_sets; ++i)
1506     {
1507       dst[i] = gen_rtx_REG (QImode, ps[i]->regno);
1508       src[i] = gen_int_mode (ps[i]->arg, QImode);
1509     }
1510
1511   if (in_set_some)
1512     {
1513       // Emit a "set_some" insn that sets all of the collected 8-bit SETs.
1514       // This is a parallel with n_sets QImode SETs as payload.
1515
1516       gcc_assert (! known_dregno || memo.knows (known_dregno));
1517
1518       // A scratch reg...
1519       rtx op1 = known_dregno
1520         ? gen_rtx_REG (QImode, known_dregno)
1521         : const0_rtx;
1522       // ...with a known content, so it can be restored without saving.
1523       rtx op2 = known_dregno
1524         ? gen_int_mode (memo.values[known_dregno], QImode)
1525         : const0_rtx;
1526       // Target register envelope.
1527       rtx op3 = GEN_INT (ii.m_regno);
1528       rtx op4 = GEN_INT (ii.m_size);
1529
1530       // Payload.
1531       for (int i = 0; i < n_sets; ++i)
1532         dst[i] = gen_rtx_SET (dst[i], src[i]);
1533
1534       rtvec vec = gen_rtvec (5 + n_sets,
1535                              gen_rtx_USE (VOIDmode, op1),
1536                              gen_rtx_USE (VOIDmode, op2),
1537                              gen_rtx_USE (VOIDmode, op3),
1538                              gen_rtx_USE (VOIDmode, op4),
1539                              gen_rtx_CLOBBER (VOIDmode, cc_reg_rtx),
1540                              dst[0], dst[1], dst[2], dst[3]);
1541       rtx pattern = gen_rtx_PARALLEL (VOIDmode, vec);
1542
1543       emit_valid_insn (pattern);
1544       n_insns += 1;
1545     }
1546   else
1547     {
1548       // Emit a bunch of movqi_insn / *reload_inqi insns.
1549
1550       for (int i = 0; i < n_sets; ++i)
1551         if (ii.m_scratch
1552             && AVRasm::constant_cost (SET, ps[i]->regno, ps[i]->arg) > 1)
1553           {
1554             rtx scratch = gen_rtx_REG (QImode, ii.m_scratch);
1555             bool use_reload_inqi = true;
1556             if (use_reload_inqi)
1557               {
1558                 emit_valid_move_clobbercc (dst[i], src[i], scratch);
1559                 n_insns += 1;
1560               }
1561             else
1562               {
1563                 emit_valid_move_clobbercc (scratch, src[i]);
1564                 emit_valid_move_clobbercc (dst[i], scratch);
1565                 n_insns += 2;
1566               }
1567           }
1568         else
1569           {
1570             emit_valid_move_clobbercc (dst[i], src[i]);
1571             n_insns += 1;
1572           }
1573     }
1574
1575   return n_sets;
1576 }
1577
1578
1579 // Try to find an operation such that  Y = op (X).
1580 // Shifts and rotates are regarded as unary operaions with
1581 // an implied 2nd operand or 1 or 4, respectively.
1582 static rtx_code
1583 find_arith (uint8_t y, uint8_t x)
1584 {
1585 #define RETIF(ex, code) y == (0xff & (ex)) ? code
1586   return select<rtx_code>()
1587     : RETIF (x + 1, PRE_INC)
1588     : RETIF (x - 1, PRE_DEC)
1589     : RETIF ((x << 4) | (x >> 4), ROTATE)
1590     : RETIF (-x, NEG)
1591     : RETIF (~x, NOT)
1592     : RETIF (x >> 1, LSHIFTRT)
1593     : RETIF (x << 1, ASHIFT)
1594     : RETIF ((x >> 1) | (x & 0x80), ASHIFTRT)
1595     : UNKNOWN;
1596 #undef RETIF
1597 }
1598
1599
1600 // Try to find an operation such that  Z = X op X.
1601 static rtx_code
1602 find_arith2 (uint8_t z, uint8_t x, uint8_t y)
1603 {
1604 #define RETIF(ex, code) z == (0xff & (ex)) ? code
1605   return select<rtx_code>()
1606     : RETIF (x + y, PLUS)
1607     : RETIF (x - y, MINUS)
1608     : RETIF (x & y, AND)
1609     : RETIF (x | y, IOR)
1610     : RETIF (x ^ y, XOR)
1611     : UNKNOWN;
1612 #undef RETIF
1613 }
1614
1615
1616 // Add plies to .plies[] that represent a MOVW, but only ones that reduce
1617 // the Hamming distance from REGNO[SIZE] to VAL by exactly DHAMM.
1618 void
1619 plies_t::add_plies_movw (int regno, int size, uint64_t val,
1620                          int dhamm, const memento_t &memo)
1621 {
1622   if (! AVR_HAVE_MOVW || size < 2)
1623     return;
1624
1625   for (int i = 0; i < size - 1; i += 2)
1626     {
1627       // MOVW that sets less than 2 regs to the target value is
1628       // not needed for the upper regs.
1629       if (dhamm != 2 && regno + i >= REG_16)
1630         continue;
1631
1632       const uint16_t val16 = val >> (8 * i);
1633       const uint8_t lo8 = val16;
1634       const uint8_t hi8 = val16 >> 8;
1635
1636       // When one of the target bytes is already as expected, then
1637       // no MOVW is needed for an optimal sequence.
1638       if (memo.have_value (regno + i, 1, lo8)
1639           || memo.have_value (regno + i + 1, 1, hi8))
1640         continue;
1641
1642       const int h_old = memo.hamming (regno + i, 2, val16);
1643
1644       // Record MOVWs that reduce the Hamming distance by DHAMM as requested.
1645       for (int j = FIRST_GPR; j < REG_32; j += 2)
1646         if (j != regno + i
1647             && memo.knows (j, 2))
1648           {
1649             const int h_new = memo.hamming (j, 2, val16);
1650             if (h_new == h_old - dhamm)
1651               add (ply_t { regno + i, 2, REG, j, 1, dhamm });
1652           }
1653     }
1654 }
1655
1656
1657 // Set PS to plys that reduce the Hamming distance from II.m_regno to
1658 // compile-time constant II.m_isrc by 2, 1 or 0.  PREV is NULL or points
1659 // to a previous ply_t.  MEMO is the GPR state after PREV and prior to the
1660 // added plys.
1661 void
1662 bbinfo_t::get_plies (plies_t &ps, const insninfo_t &ii, const memento_t &memo,
1663                      const ply_t *prev)
1664 {
1665   ps.reset ();
1666
1667   fpd->n_get_plies += 1;
1668
1669   const bool maybe_set_some = (bbinfo_t::use_set_some_p && ii.needs_scratch ());
1670
1671   // Start with cheap plies, then continue to more expensive ones.
1672   const int regno = ii.m_regno;
1673   const int size = ii.m_size;
1674   const uint64_t val = ii.m_isrc;
1675
1676   // Find MOVW with a Hamming delta of 2.
1677   ps.add_plies_movw (regno, size, val, 2, memo);
1678
1679   // Find ADIW / SBIW
1680   if (AVR_HAVE_ADIW && size >= 2)
1681     for (int i = 0; i < size - 1; i += 2)
1682       if (regno + i >= REG_24
1683           && memo.knows (regno + i, 2))
1684         {
1685           const int16_t value16 = memo[regno + i] + 256 * memo[regno + i + 1];
1686           const int16_t lo16 = val >> (8 * i);
1687           const int16_t delta = lo16 - value16;
1688           const uint8_t lo8 = val >> (8 * i);
1689           const uint8_t hi8 = val >> (8 * i + 8);
1690           if (IN_RANGE (delta, -63, 63)
1691               && lo8 != memo[regno + i]
1692               && hi8 != memo[regno + i + 1])
1693             {
1694               ps.add (ply_t { regno + i, 2, SS_PLUS, delta, 1, 2 });
1695             }
1696         }
1697
1698   // Find 1-reg plies.  In an optimal sequence, each 1-reg ply will decrease
1699   // the Hamming distance.  Thus we only have to consider plies that set
1700   // one of the target bytes to the target value VAL.  Start with the
1701   // high registers since that is the canonical order when two plies commute.
1702
1703   for (int i = size - 1; i >= 0; --i)
1704     {
1705       const uint8_t val8 = val >> (8 * i);
1706
1707       // Nothing to do for this byte when its value is already as desired.
1708       if (memo.have_value (regno + i, 1, val8))
1709         continue;
1710
1711       // LDI or CLR.
1712       if (regno + i >= REG_16 || val8 == 0)
1713         ps.add (ply_t { regno + i, 1, SET, val8, 1 }, prev, memo,
1714                 maybe_set_some);
1715
1716       // We only may need to MOV non-zero values since there is CLR,
1717       // and only when there is no LDI.
1718       if (val8 != 0
1719           && regno + i < REG_16)
1720         {
1721           // MOV where the source register is one of the target regs.
1722           for (int j = 0; j < size; ++j)
1723             if (j != i)
1724               if (memo.have_value (regno + j, 1, val8))
1725                 ps.add (ply_t { regno + i, 1, REG, regno + j, 1 });
1726
1727           // MOV where the source register is not a target reg.
1728           // FIXME: ticks.
1729           for (int j = FIRST_GPR; j < REG_32; ++j)
1730             if (! IN_RANGE (j, regno, regno + size - 1))
1731               if (memo.have_value (j, 1, val8))
1732                 ps.add (ply_t { regno + i, 1, REG, j, 1 });
1733
1734           // LDI + MOV.
1735           if (regno + i < REG_16 && val8 != 0)
1736             {
1737               ply_t p { regno + i, 1, SET, val8, 2 };
1738               p.scratch = ii.m_scratch;
1739               ps.add (p, prev, memo, maybe_set_some);
1740             }
1741         }
1742     }
1743
1744   // Arithmetic like INC, DEC or ASHIFT.
1745   for (int i = size - 1; i >= 0; --i)
1746     if (bbinfo_t::use_arith_p
1747         && regno + i < REG_16
1748         && memo.knows (regno + i))
1749       {
1750         const uint8_t y = val >> (8 * i);
1751         const uint8_t x = memo[regno + i];
1752         rtx_code code;
1753
1754         if (y == 0 || y == x)
1755           continue;
1756
1757         // INC, DEC, SWAP, LSL, NEG, ...
1758         if (UNKNOWN != (code = find_arith (y, x)))
1759           {
1760             ps.add (ply_t { regno + i, 1, code, x /* dummy */, 1 });
1761             continue;
1762           }
1763
1764         // ADD, AND, ...
1765         for (int r = FIRST_GPR; r < REG_32; ++r)
1766           if (r != regno + i
1767               && memo.knows (r)
1768               && memo[r] != 0
1769               && UNKNOWN != (code = find_arith2 (y, x, memo[r])))
1770             {
1771               ps.add (ply_t { regno + i, 1, code, r, 1 });
1772             }
1773
1774         if (size < 2 || size > 4)
1775           continue;
1776
1777         // SET + BLD
1778         if ((x & y) == x && popcount_hwi (x ^ y) == 1)
1779           ps.add (ply_t { regno + i, 1, MOD, x ^ y, 1 },
1780                   prev, memo, maybe_set_some);
1781
1782         // CLT + BLD
1783         if ((x & y) == y && popcount_hwi (x ^ y) == 1)
1784           ps.add (ply_t { regno + i, 1, MOD, x ^ y ^ 0xff, 1 },
1785                   prev, memo, maybe_set_some);
1786       }
1787
1788   if (bbinfo_t::use_arith_p
1789       // For 8-byte values, don't use ply_t's with only a partial reduction
1790       // of the hamming distance.
1791       && size <= 4)
1792     {
1793       // Find MOVW with a Hamming delta of 1, then 0.
1794       ps.add_plies_movw (regno, size, val, 1, memo);
1795       ps.add_plies_movw (regno, size, val, 0, memo);
1796     }
1797
1798   plies_t::max_n_plies = std::max (plies_t::max_n_plies, ps.n_plies);
1799 }
1800
1801
1802 // Try to combine two 8-bit insns PREV and CURR that (effectively)
1803 // are REG = CONST_INT to one 16-bit such insn.  Returns true on success.
1804 bool
1805 insninfo_t::combine (const insninfo_t &prev, const insninfo_t &curr)
1806 {
1807   if (prev.m_size == 1 && curr.m_size == 1
1808       && prev.m_regno == (1 ^ curr.m_regno)
1809       && curr.m_code == CONST_INT
1810       && prev.m_code == CONST_INT)
1811     {
1812       m_regno = curr.m_regno & ~1;
1813       m_code = CONST_INT;
1814       m_size = 2;
1815       m_scratch = std::max (curr.m_scratch, prev.m_scratch);
1816       m_isrc = m_regno == prev.m_regno
1817         ? (uint8_t) prev.m_isrc + 256 * (uint8_t) curr.m_isrc
1818         : (uint8_t) curr.m_isrc + 256 * (uint8_t) prev.m_isrc;
1819
1820       return true;
1821     }
1822
1823   return false;
1824 }
1825
1826
1827 // Return the cost (in terms of words) of the respective mov<mode> insn.
1828 // This can be used as an upper bound for the ply_t's cost.
1829 int
1830 insninfo_t::cost () const
1831 {
1832   if (m_code != CONST_INT)
1833     return m_size;
1834
1835   if (m_regno >= REG_16 || m_isrc == 0)
1836     return m_size
1837       // MOVW can save one instruction.
1838       - (AVR_HAVE_MOVW && m_size == 4 && (uint32_t) m_isrc % 0x10001 == 0);
1839
1840   // LDI + MOV to a lower reg.
1841   if (m_scratch && m_size == 1)
1842     return 2;
1843
1844   if (m_size == 8)
1845     {
1846       int len = m_size;
1847       for (int i = 0; i < m_size; ++i)
1848         len += m_regno + i < REG_16 && (0xff & (m_isrc >> (8 * i))) != 0;
1849       return len;
1850     }
1851
1852   // All other cases are complicated.  Ask the output oracle.
1853   const machine_mode mode = size_to_mode (m_size);
1854   rtx xscratch = m_scratch ? all_regs_rtx[m_scratch] : NULL_RTX;
1855   rtx xop[] = { gen_rtx_REG (mode, m_regno), gen_int_mode (m_isrc, mode) };
1856   int len;
1857   if (m_size == 4)
1858     output_reload_insisf (xop, xscratch, &len);
1859   else
1860     output_reload_in_const (xop, xscratch, &len, false);
1861
1862   return len;
1863 }
1864
1865 // Emit the according REG = REG-or-CONST_INT insn.  Returns 1 or aborts
1866 // when the insn is not of that form.
1867 int
1868 insninfo_t::emit_insn () const
1869 {
1870   int n_insns = 0;
1871
1872   machine_mode mode = size_to_mode (m_size);
1873   rtx xsrc = NULL_RTX;
1874   rtx xscratch = NULL_RTX;
1875
1876   gcc_assert (m_size > 0);
1877
1878   switch (m_code)
1879     {
1880     default:
1881       gcc_unreachable ();
1882
1883     case CONST_INT:
1884       xsrc = gen_int_mode (m_isrc, mode);
1885       if (m_scratch && m_regno < REG_16)
1886         xscratch = gen_rtx_REG (QImode, m_scratch);
1887       break;
1888
1889     case REG:
1890       gcc_assert (gpr_regno_p (m_rsrc, m_size));
1891       if (m_regno != m_rsrc)
1892         xsrc = gen_rtx_REG (mode, m_rsrc);
1893       break;
1894     }
1895
1896   if (xsrc)
1897     {
1898       rtx dest = gen_rtx_REG (mode, m_regno);
1899       emit_valid_move_clobbercc (dest, xsrc, xscratch);
1900       n_insns += 1;
1901     }
1902
1903   return n_insns;
1904 }
1905
1906
1907 // Entering a basic block means combining known register values from
1908 // all incoming BBs.
1909 void
1910 bbinfo_t::enter ()
1911 {
1912   avr_dump ("\n;; Entering [bb %d]\n", bb->index);
1913
1914   gcc_assert (! done);
1915
1916   edge e;
1917   edge_iterator ei;
1918   gprmask_t pred_known_mask = ~0u;
1919   bbinfo_t *bbi = nullptr;
1920
1921   // A quick iteration over all predecessors / incoming edges to reveal
1922   // whether this BB is worth a closer look.
1923   FOR_EACH_EDGE (e, ei, bb->preds)
1924     {
1925       basic_block pred = e->src;
1926       bbi = & bb_info[pred->index];
1927
1928       pred_known_mask &= bbi->regs.known;
1929
1930       if (dump_file)
1931         {
1932           avr_dump (";; [bb %d] <- [bb %d] ", e->dest->index, e->src->index);
1933           if (bbi->done)
1934             bbi->regs.dump ();
1935           else
1936             avr_dump (" (unknown)\n");
1937         }
1938     }
1939
1940   // Only if all predecessors have already been handled, we can
1941   // have known values as we are entering the current BB.
1942   if (pred_known_mask != 0
1943       && bbi != nullptr)
1944     {
1945       // Initialize current BB info from BI, an arbitrary predecessor.
1946
1947       regs = bbi->regs;
1948
1949       // Coalesce the output values from all predecessing BBs.  At the
1950       // start of the current BB, a value is only known if it is known
1951       // in *all* predecessors and *all* these values are the same.
1952       FOR_EACH_EDGE (e, ei, bb->preds)
1953         {
1954           regs.coalesce (bb_info[e->src->index].regs);
1955         }
1956     }
1957
1958   if (dump_file)
1959     {
1960       avr_dump (";; [bb %d] known at start: ", bb->index);
1961       if (regs.known)
1962         regs.dump ();
1963       else
1964         avr_dump (" (none)\n");
1965       avr_dump ("\n");
1966     }
1967 }
1968
1969
1970 void
1971 bbinfo_t::leave ()
1972 {
1973   done = true;
1974
1975   if (dump_file)
1976     fprintf (dump_file, ";; Leaving [bb %d]\n\n", bb->index);
1977 }
1978
1979
1980 /* Initialize according to INSN which is a 1-byte single_set that's
1981    (effectively) a reg = reg or reg = const move.  INSN may be the result
1982    of the current pass's optimization, e.g. something like INC R2 where R2
1983    has a known content.  MEMO is the state prior to INSN.  Only CONST
1984    cases are recorded; plus cases that are non-trivial for example when
1985    an XOR decays to a move.  */
1986
1987 bool
1988 insninfo_t::init1 (insn_optimize_data_t &iod, int max_size,
1989                    const char *purpose = "")
1990 {
1991   m_size = 0;
1992   m_insn = iod.insn;
1993   m_old_code = UNKNOWN;
1994   iod.unused = false;
1995
1996   if (! iod.insn
1997       || ! (m_set = single_set_with_scratch (iod.insn, m_scratch)))
1998     return false;
1999
2000   rtx dest = SET_DEST (m_set);
2001   machine_mode mode = GET_MODE (dest);
2002   const int n_bytes = GET_MODE_SIZE (mode);
2003   max_size = std::min (max_size, FUSE_MOVE_MAX_MODESIZE);
2004
2005   if (! REG_P (dest)
2006       || END_REGNO (dest) > REG_32
2007       || n_bytes > max_size)
2008     return false;
2009
2010   // Omit insns that (explicitly) touch fixed GPRs in any way.
2011   using elt0_getter_HRS = elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>;
2012   HARD_REG_SET hregs;
2013   CLEAR_HARD_REG_SET (hregs);
2014   find_all_hard_regs (PATTERN (iod.insn), & hregs);
2015   if (memento_t::fixed_regs_mask & (gprmask_t) elt0_getter_HRS::get (hregs))
2016     {
2017       avr_dump (";; %sinit1 has fixed GPRs\n", purpose);
2018       return false;
2019     }
2020
2021   if ((iod.unused = find_reg_note (iod.insn, REG_UNUSED, dest)))
2022     return false;
2023
2024   m_src = SET_SRC (m_set);
2025   m_regno = REGNO (dest);
2026   const rtx_code src_code = GET_CODE (m_src);
2027
2028   m_ai = absint_t::explore (m_src, iod.regs, mode);
2029
2030   if (m_ai.popcount ())
2031     {
2032       if (m_ai.end_knows (CONST_INT) >= n_bytes)
2033         {
2034           m_code = CONST_INT;
2035           m_old_code = CONSTANT_P (m_src) ? UNKNOWN : src_code;
2036           m_isrc = m_ai.get_value (n_bytes);
2037           m_size = n_bytes;
2038         }
2039       else if (! REG_P (m_src)
2040                && n_bytes == 1
2041                && m_ai.end_knows (REG) >= n_bytes)
2042         {
2043           m_code = REG;
2044           m_old_code = src_code;
2045           m_rsrc = m_ai[0].regno ();
2046           m_size = n_bytes;
2047         }
2048       else if (n_bytes == 1)
2049         {
2050           absint_byte_t &aib = m_new_src;
2051           aib = m_ai[0].find_alternative_binary (iod.regs);
2052
2053           if (aib.arity () == 2
2054               && aib.arg (0).regno == m_regno)
2055             {
2056               m_old_code = src_code;
2057               m_code = aib.get_code ();
2058               m_size = n_bytes;
2059             }
2060         }
2061       else if (n_bytes >= 2
2062                && m_ai.end_knows (VALUE) >= n_bytes)
2063         {
2064           m_code = src_code;
2065           m_size = n_bytes;
2066         }
2067
2068       if (dump_file && m_size != 0)
2069         {
2070           avr_dump (";; %sinit1 (%C", purpose,
2071                     m_old_code ? m_old_code : m_code);
2072           if (m_old_code)
2073             avr_dump ("-> %C", m_code);
2074           avr_dump (") insn %d to R%d[%d] := %C:%m = ", INSN_UID (iod.insn),
2075                     m_regno, n_bytes, src_code, mode);
2076
2077           m_ai.dump (dest);
2078
2079           if (dump_flags & TDF_FOLDING)
2080             avr_dump ("\n");
2081         }
2082     }
2083
2084   return m_size != 0;
2085 }
2086
2087
2088 // The private worker for .apply_insn().
2089 void
2090 memento_t::apply_insn1 (rtx_insn *insn, bool unused)
2091 {
2092   gcc_assert (NONDEBUG_INSN_P (insn));
2093
2094   if (INSN_CODE (insn) == CODE_FOR_set_some)
2095     {
2096       // This insn only sets some selected bytes of register $3 of
2097       // modesize $4.  If non-0, then $1 is a QImode scratch d-reg with
2098       // a known value of $2.
2099
2100       const auto &xop = recog_data.operand;
2101       extract_insn (insn);
2102       gcc_assert (recog_data.n_operands == 7);
2103       gcc_assert (set_some_operation (xop[0], VOIDmode));
2104
2105       const rtx &xscratch = xop[1];
2106       const rtx &xscratch_value = xop[2];
2107       const int sets_start = 5;
2108
2109       for (int i = sets_start; i < XVECLEN (xop[0], 0); ++i)
2110         {
2111           rtx xset = XVECEXP (xop[0], 0, i);
2112           avr_dump (";; set_some %r = %r\n", XEXP (xset, 0), XEXP (xset, 1));
2113           set_values (XEXP (xset, 0), XEXP (xset, 1));
2114         }
2115
2116       if (REG_P (xscratch))
2117         {
2118           avr_dump (";; set_some %r = %r restore\n", xscratch, xscratch_value);
2119           set_values (xscratch, xscratch_value);
2120         }
2121
2122       return;
2123     } // CODE_FOR_set_some
2124
2125   memento_t mold = *this;
2126
2127   // When insn changes a register in whatever way, set it to "unknown".
2128
2129   HARD_REG_SET rset;
2130   find_all_hard_reg_sets (insn, &rset, true /* implicit */);
2131   (*this) &= ~rset;
2132
2133   rtx set = single_set (insn);
2134   rtx dest;
2135
2136   if (! set
2137       || ! REG_P (dest = SET_DEST (set))
2138       || END_REGNO (dest) > REG_32
2139       || (regmask (dest) & memento_t::fixed_regs_mask))
2140     return;
2141
2142   rtx src = SET_SRC (set);
2143   const rtx_code src_code = GET_CODE (src);
2144   const machine_mode mode = GET_MODE (dest);
2145   const int n_bytes = GET_MODE_SIZE (mode);
2146
2147   // Insns that are too complicated or have a poor yield.
2148   // Just record which regs are clobberd / changed.
2149   if (n_bytes > FUSE_MOVE_MAX_MODESIZE
2150       || MEM_P (src)
2151       || (REG_P (src) && END_REGNO (src) > REG_32))
2152     {
2153       // Comparisons may clobber the compared reg when it is unused after.
2154       if (src_code == COMPARE
2155           && REG_P (XEXP (src, 0))
2156           && CONSTANT_P (XEXP (src, 1)))
2157         {
2158           rtx reg = XEXP (src, 0);
2159           for (unsigned r = REGNO (reg); r < END_REGNO (reg); ++r)
2160             set_unknown (r);
2161         }
2162       return;
2163     }
2164
2165   if (unused)
2166     return;
2167
2168   // Simulate the effect of some selected insns that are likely to produce
2169   // or propagate known values.
2170
2171   // Get an abstract representation of src.  Bytes may be unknown,
2172   // known to equal some 8-bit compile-time constant (CTC) value,
2173   // or are known to equal some 8-bit register.
2174   // TODO: Currently, only the ai[].val8 knowledge ist used.
2175   //       What's the best way to make use of ai[].regno ?
2176
2177   absint_t ai = absint_t::explore (src, mold, mode);
2178
2179   if (ai.popcount ())
2180     {
2181       avr_dump (";; apply_insn %d R%d[%d] := %C:%m = ", INSN_UID (insn),
2182                 REGNO (dest), n_bytes, src_code, mode);
2183       ai.dump ();
2184
2185       for (int i = 0; i < n_bytes; ++i)
2186         if (ai[i].can (CONST_INT))
2187           set_value (i + REGNO (dest), ai[i].val8 ());
2188     }
2189 }
2190
2191
2192 void
2193 memento_t::apply (const ply_t &p)
2194 {
2195   if (p.is_movw ())
2196     {
2197       copy_value (p.regno, p.arg);
2198       copy_value (p.regno + 1, p.arg + 1);
2199     }
2200   else if (p.is_adiw ())
2201     {
2202       int val = p.arg + values[p.regno] + 256 * values[1 + p.regno];
2203       set_value (p.regno, val);
2204       set_value (p.regno + 1, val >> 8);
2205     }
2206   else if (p.size == 1)
2207     {
2208       int x = values[p.regno];
2209       int y = values[p.arg];
2210
2211       switch (p.code)
2212         {
2213         default:
2214           gcc_unreachable ();
2215           break;
2216
2217         case REG:
2218           copy_value (p.regno, p.arg);
2219           break;
2220
2221         case SET:
2222           set_value (p.regno, p.arg);
2223           if (p.scratch >= REG_16)
2224             set_unknown (p.scratch);
2225           break;
2226
2227         case MOD: // BLD
2228           gcc_assert (knows (p.regno));
2229           if (popcount_hwi (p.arg) == 1)
2230             values[p.regno] |= p.arg;
2231           else if (popcount_hwi (p.arg) == 7)
2232             values[p.regno] &= p.arg;
2233           else
2234             gcc_unreachable ();
2235           break;
2236
2237 #define DO_ARITH(n_args, code, expr)                                    \
2238           case code:                                                    \
2239             gcc_assert (knows (p.regno));                               \
2240             if (n_args == 2)                                            \
2241               gcc_assert (knows (p.arg));                               \
2242             set_value (p.regno, expr);                                  \
2243             break
2244
2245           DO_ARITH (1, NEG, -x);
2246           DO_ARITH (1, NOT, ~x);
2247           DO_ARITH (1, PRE_INC, x + 1);
2248           DO_ARITH (1, PRE_DEC, x - 1);
2249           DO_ARITH (1, ROTATE, (x << 4) | (x >> 4));
2250           DO_ARITH (1, ASHIFT, x << 1);
2251           DO_ARITH (1, LSHIFTRT, x >> 1);
2252           DO_ARITH (1, ASHIFTRT, (x >> 1) | (x & 0x80));
2253
2254           DO_ARITH (2, AND, x & y);
2255           DO_ARITH (2, IOR, x | y);
2256           DO_ARITH (2, XOR, x ^ y);
2257           DO_ARITH (2, PLUS, x + y);
2258           DO_ARITH (2, MINUS, x - y);
2259 #undef DO_ARITH
2260         }
2261     } // size == 1
2262   else
2263     gcc_unreachable ();
2264 }
2265
2266
2267 // Try to find a sequence of ply_t's that represent a II.m_regno = II.m_isrc
2268 // insn that sets a reg to a compile-time constant, and that is more
2269 // efficient than just a move insn.  (When try_split_any_p is on, then
2270 // solutions that perform equal to a move insn are also allowed).
2271 // MEMO0 is the GPR state before II runs.  A solution has been found
2272 // when .fpd->solution has at least one entry.  LEN specifies the
2273 // depth of recursion, which works on the LEN-th ply_t.
2274 void
2275 bbinfo_t::find_plies (int len, const insninfo_t &ii, const memento_t &memo0)
2276 {
2277   if (len > fpd->n_best_plys)
2278     return;
2279
2280   memento_t memo = memo0;
2281   bool ply_applied_p = false;
2282
2283   //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2284   const bool extra = dump_file && (dump_flags & TDF_FOLDING);
2285
2286   if (extra)
2287     {
2288       fprintf (dump_file, ";; #%d (HAM=%d): get_plies R%d[%d] = ", len,
2289                ii.hamming (fpd->regs0), ii.m_regno, ii.m_size);
2290       fprintf (dump_file, "0x%0*" PRIx64 "\n",
2291                2 * ii.m_size, ii.m_isrc & size_to_mask (ii.m_size));
2292     }
2293
2294   plies_t &ps = fpd->plies[len - 1];
2295
2296   const ply_t *const prev = len >= 2 ? fpd->ply_stack[len - 2] : nullptr;
2297   const ply_t *const prev2 = len >= 3 ? fpd->ply_stack[len - 3] : nullptr;
2298
2299   bbinfo_t::get_plies (ps, ii, memo0, prev);
2300
2301 #define NEXT(reason)                                    \
2302   do {                                                  \
2303     if (extra)                                          \
2304       fprintf (dump_file, ";; cont=%s\n", reason);      \
2305     goto next;                                          \
2306   } while (0)
2307
2308   for (int ip = 0; ip < ps.n_plies; ++ip)
2309     {
2310       const ply_t &p = ps.plies[ip];
2311
2312       fpd->ply_stack[len - 1] = &p;
2313
2314       if (0)
2315         next: continue;
2316
2317       if (extra)
2318         ply_t::dump_plys (dump_file, len, 1, fpd->ply_stack + len - 1, memo0);
2319
2320       // A MOVW with a Hamming distance of < 2 requires more plys.
2321       if (p.is_movw () && len + (2 - p.dhamming) > fpd->n_best_plys)
2322         NEXT ("movw.plys");
2323
2324       if (len >= 2)
2325         {
2326           // Destroying (parts of) the results of the previous ply
2327           // won't yield an optimal sequence.
2328           if (p.overrides (prev))
2329             NEXT ("overrides");
2330
2331           // When two plys are independent of each other, then only
2332           // investigate sequences that operate on the higher reg first.
2333           // This canonicalization reduces the number of candidates,
2334           if (p.commutes_with (prev, ii.m_scratch)
2335               && p.regno > prev->regno)
2336             NEXT ("noncanonic");
2337
2338           // Two subsequent BLDs touching the same register.
2339           if (p.is_bld ()
2340               && prev->is_bld ()
2341               && p.changes_result_of (prev))
2342             NEXT ("2bld");
2343
2344           // When there is a BLD, then at least 2 of the same kind
2345           // shall occur in a row.
2346           if (prev->is_bld ()
2347               && ! p.is_bld ()
2348               && (len == 2
2349                   || (prev->is_setbld () && ! prev2->is_setbld ())
2350                   || (prev->is_cltbld () && ! prev2->is_cltbld ())))
2351             NEXT ("1bld");
2352         }
2353
2354       // The hamming delta of a MOVW may be less than 2, namely 0 or 1.
2355       // When the latter is the case, then a reasonable sequence must
2356       // modify the result of the MOVW.
2357       if (len >= 2
2358           && prev->is_movw ()
2359           && prev->dhamming == 1
2360           && ! p.changes_result_of (prev))
2361         NEXT ("movw.dh=1");
2362
2363       if (len >= 3
2364           && prev2->is_movw ()
2365           && prev2->dhamming == 0
2366           && ! p.changes_result_of (prev2))
2367         NEXT ("movw.dh=0");
2368
2369       // When setting an n-byte destination, then at most n/2 MOVWs
2370       // will occur in an optimal sequence.
2371       int n_movw = 0;
2372       for (int i = 0; i < len; ++i)
2373         n_movw += fpd->ply_stack[i]->is_movw ();
2374       if (n_movw > ii.m_size / 2)
2375         NEXT ("movws");
2376
2377       if (ply_applied_p)
2378         memo = memo0;
2379
2380       memo.apply (p);
2381
2382       ply_applied_p = true;
2383
2384       // Calculate the cost of the sequence we have so far.  Scale by some
2385       // factor so that we can express that ADIW is more expensive than MOVW
2386       // because it is slower, but without defeating MOVW.
2387       const int SCALE = 4;
2388
2389       int penal = 0;
2390       int cost = SCALE * 0;
2391
2392       bool movw_p = 0;
2393       for (int i = 0; i < len; ++i)
2394         {
2395           bool adiw_p = fpd->ply_stack[i]->is_adiw ();
2396           cost += SCALE * fpd->ply_stack[i]->cost + adiw_p;
2397           penal += adiw_p;
2398           movw_p |= fpd->ply_stack[i]->is_movw ();
2399         }
2400       penal += movw_p;
2401
2402       const int hamm = ii.hamming (memo);
2403
2404       // The current Hamming distance yields a lower bound of how many
2405       // plys are still required.  Consider that future cost already now.
2406       int future_cost = AVR_HAVE_MOVW || (AVR_HAVE_ADIW && ii.m_regno >= REG_22)
2407         ? (1 + hamm) / 2
2408         : hamm;
2409
2410       // Similarly, when MOVW doesn't decrease the Hamming distance by 2,
2411       // then we know that at least 2 - dhamming plys must follow in the
2412       // future.  (MOVW + ADIW will not occur.)
2413       if (p.is_movw ())
2414         future_cost = std::max (future_cost, 2 - p.dhamming);
2415
2416       if (extra && future_cost)
2417         avr_dump (";; future cost = %d, dh=%d\n", future_cost, hamm);
2418
2419       cost += SCALE * future_cost;
2420
2421       bool profitable = (cost < SCALE * fpd->max_ply_cost
2422                          || (bbinfo_t::try_split_any_p
2423                              && fpd->solution.n_plies == 0
2424                              && cost / SCALE <= fpd->max_ply_cost
2425                              && cost / SCALE == fpd->movmode_cost));
2426       if (! profitable)
2427         {
2428           if (extra)
2429             avr_dump (";; cont=cost %d+%d/%d\n", cost / SCALE, penal, SCALE);
2430           continue;
2431         }
2432
2433       if (hamm)
2434         {
2435           // Go down that rabbit hole.
2436           gcc_assert (ply_applied_p);
2437           bbinfo_t::find_plies (1 + len, ii, memo);
2438           continue;
2439         }
2440
2441       // Found a solution that's better than everything so far.
2442
2443       // Reduce the upper cost bound according to the found solution.
2444       // No future solution will be more expensive.
2445       fpd->max_ply_cost = cost / SCALE;
2446
2447       fpd->solution = plies_t (len, fpd->ply_stack);
2448
2449       if (dump_file)
2450         {
2451           avr_dump (";; #%d FOUND COST = %d%s\n", len, cost / SCALE,
2452                     penal ? " with penalty" : "");
2453           ply_t::dump_plys (dump_file, 0, len, fpd->ply_stack, fpd->regs0);
2454           if (extra)
2455             avr_dump (";; END\n");
2456         }
2457     } // for ply_t's
2458
2459 #undef NEXT
2460 }
2461
2462
2463 // Run .find_plies() and return true when .fpd->solution is a sequence of ply_t's
2464 // that represents II, a REG = CONST insn.  MEMO is the GPR state prior to II.
2465 bool
2466 bbinfo_t::run_find_plies (const insninfo_t &ii, const memento_t &memo) const
2467 {
2468   fpd->solution.reset ();
2469   fpd->regs0 = memo;
2470   fpd->n_get_plies = 0;
2471
2472   const int hamm = ii.hamming (memo);
2473
2474   if (hamm == 0)
2475     {
2476       avr_dump (";; Found redundant insn %d\n",
2477                 ii.m_insn ? INSN_UID (ii.m_insn) : 0);
2478       return true;
2479     }
2480
2481   // Upper bound (in words) for any solution that's better than mov<mode>.
2482   // Will be decreased by find plies as it finds better solutions.
2483   fpd->movmode_cost = ii.cost ();
2484   fpd->max_ply_cost = fpd->movmode_cost;
2485
2486   // With a non-zero Hamming distance, this insn will require at least one
2487   // instruction.  When the upper bound for required instructions is that
2488   // small, then the current insn is good enough.
2489   if (fpd->max_ply_cost <= 1)
2490     return false;
2491
2492   fpd->n_best_plys = ii.n_best_plys (hamm);
2493   gcc_assert (fpd->n_best_plys <= N_BEST_PLYS);
2494
2495   if (dump_file)
2496     {
2497       const uint64_t mask = size_to_mask (ii.m_size);
2498       fprintf (dump_file, ";; find_plies R%d[%d] = 0x%0*" PRIx64,
2499                ii.m_regno, ii.m_size, 2 * ii.m_size, ii.m_isrc & mask);
2500       if (ii.m_scratch)
2501         fprintf (dump_file, ", scratch=r%d", ii.m_scratch);
2502       memo.dump ("\n;; regs%s\n");
2503     }
2504
2505   avr_dump (";; mov<mode> cost = %d\n", fpd->max_ply_cost);
2506   avr_dump (";; max plys = %d\n", fpd->n_best_plys);
2507   ply_t::n_ply_ts = 0;
2508
2509   find_plies (1, ii, memo);
2510
2511   avr_dump (";; get_plies called %d times\n", fpd->n_get_plies);
2512   avr_dump (";; n_ply_ts = %d\n", ply_t::n_ply_ts);
2513   ply_t::max_n_ply_ts = std::max (ply_t::max_n_ply_ts, ply_t::n_ply_ts);
2514
2515   return fpd->solution.n_plies != 0;
2516 }
2517
2518
2519 // Try to propagate __zero_reg__ to a mem = reg insn's source.
2520 // Returns true on success and sets .n_new_insns.
2521 bool
2522 optimize_data_t::try_mem0 (bbinfo_t *)
2523 {
2524   rtx_insn *insn = curr.ii.m_insn;
2525   rtx set, mem, reg;
2526   machine_mode mode;
2527
2528   if (insn
2529       && (set = single_set (insn))
2530       && MEM_P (mem = SET_DEST (set))
2531       && REG_P (reg = SET_SRC (set))
2532       && GET_MODE_SIZE (mode = GET_MODE (mem)) <= 4
2533       && END_REGNO (reg) <= REG_32
2534       && ! (regmask (reg) & memento_t::fixed_regs_mask)
2535       && curr.regs.have_value (REGNO (reg), GET_MODE_SIZE (mode), 0x0))
2536     {
2537       avr_dump (";; Found insn %d: mem:%m = 0 = r%d\n", INSN_UID (insn),
2538                 mode, REGNO (reg));
2539
2540       // Some insns like PUSHes don't clobber REG_CC.
2541       bool clobbers_cc = GET_CODE (PATTERN (insn)) == PARALLEL;
2542
2543       if (clobbers_cc)
2544         emit_valid_move_clobbercc (mem, CONST0_RTX (mode));
2545       else
2546         emit_valid_insn (gen_rtx_SET (mem, CONST0_RTX (mode)));
2547
2548       n_new_insns = 1;
2549
2550       return true;
2551     }
2552
2553   return false;
2554 }
2555
2556
2557 // Try to fuse two 1-byte insns .prev and .curr to one 2-byte insn (MOVW).
2558 // Returns true on success, and sets .n_new_insns, .ignore_mask etc.
2559 bool
2560 optimize_data_t::try_fuse (bbinfo_t *bbi)
2561 {
2562   insninfo_t comb;
2563
2564   if (! prev.ii.m_size
2565       || ! curr.ii.m_size
2566       || ! comb.combine (prev.ii, curr.ii))
2567     return false;
2568
2569   avr_dump (";; Working on fuse of insn %d + insn %d = 0x%04x\n",
2570             INSN_UID (prev.insn), INSN_UID (curr.insn),
2571             (unsigned) comb.m_isrc);
2572
2573   bool found = bbi->run_find_plies (comb, prev.regs);
2574   if (found)
2575     {
2576       avr_dump (";; Found fuse of insns %d and %d\n",
2577                 INSN_UID (prev.insn), INSN_UID (curr.insn));
2578
2579       n_new_insns = bbinfo_t::fpd->solution.emit_insns (comb, prev.regs);
2580       delete_prev_p = true;
2581
2582       if (prev.ii.m_scratch)
2583         ignore_mask |= regmask (prev.ii.m_scratch, 1);
2584       if (curr.ii.m_scratch)
2585         ignore_mask |= regmask (curr.ii.m_scratch, 1);
2586       ignore_mask &= ~regmask (comb.m_regno, comb.m_size);
2587     }
2588
2589   return found;
2590 }
2591
2592
2593 // Try to replace an arithmetic 1-byte insn by a reg-reg move.
2594 // Returns true on success, and sets .n_new_insns etc.
2595 bool
2596 optimize_data_t::try_simplify (bbinfo_t *)
2597 {
2598   if (curr.ii.m_size == 1
2599       && curr.ii.m_old_code != REG
2600       && curr.ii.m_code == REG)
2601     {
2602       avr_dump (";; Found simplify of insn %d\n", INSN_UID (curr.insn));
2603
2604       n_new_insns = curr.ii.emit_insn ();
2605
2606       return true;
2607     }
2608
2609   return false;
2610 }
2611
2612
2613 // Try to replace XEXP (*, 1) of a binary operation by a cheaper expression.
2614 // Returns true on success; sets .n_new_insns, .ignore_mask, .delete_prev_p.
2615 bool
2616 optimize_data_t::try_bin_arg1 (bbinfo_t *)
2617 {
2618   if (curr.ii.m_size != 1
2619       || curr.ii.m_new_src.arity () != 2
2620       || curr.unused)
2621     return false;
2622
2623   avr_dump (";; Working on bin_arg1 insn %d\n", INSN_UID (curr.insn));
2624
2625   gcc_assert (curr.ii.m_src && BINARY_P (curr.ii.m_src));
2626   rtx xarg1_old = XEXP (curr.ii.m_src, 1);
2627
2628   const absint_byte_t &aib = curr.ii.m_new_src;
2629   const absint_val_t &arg0 = aib.arg (0);
2630   const absint_val_t &arg1 = aib.arg (1);
2631   const absint_val_t &arg1_old = curr.ii.m_ai[0].arg (1);
2632
2633   rtx src = NULL_RTX;
2634
2635   if (CONSTANT_P (xarg1_old))
2636     {
2637       // Sometimes, we allow expensive constants as 2nd operand like
2638       // in  R2 += 2  which produces two INCs.  When we have the
2639       // constant handy in a reg, then use that instead of the constant.
2640       const rtx_code code = aib.get_code ();
2641       gcc_assert (arg1.val8 == (INTVAL (xarg1_old) & 0xff));
2642
2643       if (AVRasm::constant_cost (code, arg0.regno, arg1.val8) > 1)
2644           src = aib.to_rtx ();
2645     }
2646   else if (REG_P (xarg1_old)
2647            && dead_or_set_p (curr.insn, xarg1_old))
2648     {
2649       src = aib.to_rtx ();
2650
2651       // The 2nd operand is a reg with a known content that dies
2652       // at the current insn.  Chances are high that the register
2653       // holds a reload value only used by the current insn.
2654       if (prev.ii.m_size == 1
2655           && rtx_equal_p (xarg1_old, SET_DEST (prev.ii.m_set))
2656           && CONSTANT_P (prev.ii.m_src))
2657         {
2658           avr_dump (";; Found dying reload insn %d\n", INSN_UID (prev.insn));
2659
2660           delete_prev_p = true;
2661           ignore_mask = regmask (arg1_old.regno, 1);
2662         }
2663     }
2664
2665   if (src)
2666     {
2667       rtx dest = SET_DEST (curr.ii.m_set);
2668
2669       avr_dump (";; Found bin_arg1 for insn %d: ", INSN_UID (curr.insn));
2670       avr_dump ("%C:%m %r", curr.ii.m_code, GET_MODE (dest), xarg1_old);
2671       aib.dump (" = %s\n");
2672
2673       emit_valid_move_clobbercc (dest, src);
2674       n_new_insns = 1;
2675     }
2676
2677   return src != NULL_RTX;
2678 }
2679
2680
2681 // Try to replace a REG = CONST insn by a cheaper sequence.
2682 // Returns true on success, and sets .n_new_insns, .ignore_mask etc.
2683 bool
2684 optimize_data_t::try_split_ldi (bbinfo_t *bbi)
2685 {
2686   if (! curr.ii.m_size
2687       || curr.unused
2688       || curr.ii.m_code != CONST_INT
2689       || (! bbinfo_t::try_split_any_p
2690           // Finding plys will only ever succeed when there are
2691           // regs with a known value.
2692           && ! (curr.regs.known
2693                 || (AVR_HAVE_MOVW
2694                     && curr.ii.m_regno < REG_16 && curr.ii.m_size == 4))))
2695     return false;
2696
2697   avr_dump (";; Working on split_ldi insn %d\n", INSN_UID (curr.insn));
2698
2699   bool found = bbi->run_find_plies (curr.ii, curr.regs);
2700   if (found)
2701     {
2702       avr_dump (";; Found split for ldi insn %d\n", INSN_UID (curr.insn));
2703
2704       n_new_insns = bbinfo_t::fpd->solution.emit_insns (curr.ii, curr.regs);
2705
2706       if (curr.ii.m_scratch)
2707         ignore_mask = regmask (curr.ii.m_scratch, 1);
2708     }
2709
2710   return found;
2711 }
2712
2713
2714 // Helper for try_split_any().
2715 bool
2716 optimize_data_t::fail (const char *reason)
2717 {
2718   n_new_insns = -1;
2719
2720   if (dump_file)
2721     fprintf (dump_file, ";; Giving up split_any: %s\n", reason);
2722
2723   return false;
2724 }
2725
2726
2727 // Helper for try_split_any().
2728 rtx_insn *
2729 optimize_data_t::emit_and_apply_move (memento_t &memo, rtx dest, rtx src)
2730 {
2731   rtx_insn *insn = emit_valid_move_clobbercc (dest, src);
2732   n_new_insns += 1;
2733   memo.apply_insn (insn, false);
2734
2735   return insn;
2736 }
2737
2738
2739 // Set X0 and X1 so that they are operands valid for a andqi3, iorqi3, xorqi3
2740 // or addqi3 insn with destination R_DEST.  The method loads X1 to
2741 // a scratch reg as needed and records the GPR effect in IOD.regs.
2742 // EXTRA_COST are extra costs in units of words of insns that cost more
2743 // than one instruction.  This is a helper for try_split_any().
2744 bool
2745 optimize_data_t
2746     ::get_2ary_operands (rtx_code &code, const absint_byte_t &aib,
2747                          insn_optimize_data_t &iod, int r_dest,
2748                          absint_val_t &x0, absint_val_t &x1, int &extra_cost)
2749 {
2750   if (code != IOR && code != AND && code != XOR && code != PLUS)
2751     return fail ("2ary: unknown code");
2752
2753   x0 = aib.arg (0);
2754   x1 = aib.arg (1);
2755
2756   if (! x0.knows_regno ()
2757       || x1.clueless ())
2758     return fail ("2ary: clueless");
2759
2760   int val8 = x1.val8;
2761   int val8_cost = val8 < 0 ? 100 : AVRasm::constant_cost (code, r_dest, val8);
2762
2763   if (x0.regno == r_dest
2764       && (x1.knows_regno ()
2765           || val8_cost <= 1))
2766     {
2767       if (code == XOR
2768           && val8 == 0x80
2769           && x0.regno >= REG_16)
2770         {
2771           // xorxi3 can only "r,0,r".
2772           // x0 ^ 0x80  <=>  x0 - 0x80.
2773           x1.regno = 0;
2774           code = MINUS;
2775         }
2776       return true;
2777     }
2778
2779   const bool and_1_bit = code == AND && popcount_hwi (val8) == 1;
2780   // andqi3 has a "r,r,Cb1" alternative where Cb1 has exactly 1 bit set.
2781   // This can accommodate bytes of higher AND Cb<N> alternatives.
2782   if (x0.regno != r_dest)
2783     {
2784       if (and_1_bit)
2785         {
2786           extra_cost += 1 + (r_dest < REG_16);
2787           return true;
2788         }
2789       else if (x1.regno == r_dest)
2790         {
2791           std::swap (x0, x1);
2792           return true;
2793         }
2794       return fail ("2ary is a 3-operand insn");
2795     }
2796
2797   // Now we have:
2798   // 1)  r_dest = x0.regno, and
2799   // 2)  x1 is val8, and
2800   // 3)  x1 costs 2.
2801
2802   const bool needs_scratch_p = select<bool>()
2803     : code == XOR ? true
2804     : code == AND ? popcount_hwi (val8) != 7
2805     : code == IOR ? popcount_hwi (val8) != 1
2806     : code == PLUS ? IN_RANGE (val8, 3, 0xff - 3)
2807     : bad_case<bool> ();
2808
2809   const int r_val8 = iod.regs.regno_with_value (val8, 0 /* excludes: none */);
2810   if (r_val8)
2811     {
2812       // Found a reg that already holds the constant.
2813       x1.val8 = -1;
2814       x1.regno = r_val8;
2815       return true;
2816     }
2817   else if (iod.ii.m_scratch)
2818     {
2819       // Using the insn's scratch reg.
2820       rtx xdst = gen_rtx_REG (QImode, iod.ii.m_scratch);
2821       rtx xsrc = gen_int_mode (x1.val8, QImode);
2822       emit_and_apply_move (iod.regs, xdst, xsrc);
2823
2824       x1.regno = iod.ii.m_scratch;
2825       x1.val8 = -1;
2826
2827       return true;
2828     }
2829   else if (! needs_scratch_p)
2830     {
2831       // Some constants (1 and -1) can be loaded without a scratch.
2832       extra_cost += 1;
2833       return true;
2834     }
2835   else if (and_1_bit)
2836     {
2837       // This can always fall back to BST + CLR + BLD, but may be cheaper.
2838       extra_cost += 1 + (r_dest < REG_16);
2839       return true;
2840     }
2841
2842   return fail ("2ary: expensive constant");
2843 }
2844
2845
2846 static inline bool
2847 any_shift_p (rtx_code code)
2848 {
2849   return code == LSHIFTRT || code == ASHIFTRT || code == ASHIFT;
2850 }
2851
2852 // Try to split .curr into a sequence of 1-byte insns.
2853 // Returns true on success.  Sets .n_new_insns and .ignore_mask.
2854 bool
2855 optimize_data_t::try_split_any (bbinfo_t *)
2856 {
2857   if (curr.ii.m_size < 2
2858       // Constants are split by split_ldi.
2859       || CONSTANT_P (curr.ii.m_src)
2860       // Splitting requires knowledge about what to do with each byte.
2861       || curr.ii.m_ai.end_knows (VALUE) < curr.ii.m_size)
2862     return false;
2863
2864   avr_dump (";; Working on split_any %C:%m insn %d\n", curr.ii.m_code,
2865             GET_MODE (SET_DEST (curr.ii.m_set)), INSN_UID (curr.insn));
2866
2867   const insninfo_t &ii = curr.ii;
2868   const int n_bytes = ii.m_size;
2869   int extra_cost = 0;
2870   int binop_cost = -1;
2871
2872   // For plain AND, IOR, XOR get the current cost in units of words.
2873   if (BINARY_P (curr.ii.m_src))
2874     {
2875       const rtx_code code = curr.ii.m_code;
2876       if ((code == IOR || code == AND || code == XOR)
2877           && REG_P (XEXP (curr.ii.m_src, 0))
2878           && CONSTANT_P (XEXP (curr.ii.m_src, 1)))
2879         {
2880           binop_cost = get_attr_length (curr.insn);
2881           avr_dump (";; Competing against %C:%m cost = %d\n", code,
2882                     GET_MODE (curr.ii.m_src), binop_cost);
2883         }
2884     }
2885
2886   // Step 1: Work out conflicts and which sign extends to perform.
2887
2888   const gprmask_t regs_dest = regmask (ii.m_regno, n_bytes);
2889   int r_sign = 0;
2890   gprmask_t regs_signs = 0;
2891   bool has_lsl = false;
2892   bool has_lsr = false;
2893
2894   for (int i = 0; i < n_bytes; ++i)
2895     {
2896       const absint_byte_t &aib = ii.m_ai[i];
2897       const int r_dest = ii.m_regno + i;
2898       const gprmask_t regs_src = aib.reg_mask ();
2899
2900       // When only regs to the right are used, or only regs to the left
2901       // are used, then there's no conflict like it is arising for rotates.
2902       // For now, only implement conflict-free splits.
2903       has_lsl |= has_bits_in (regs_src & regs_dest, 0, r_dest - 1);
2904       has_lsr |= has_bits_in (regs_src & regs_dest, r_dest + 1, 31);
2905       if (has_lsl && has_lsr)
2906         return fail ("has both << and >>");
2907
2908       if (aib.get_code () == SIGN_EXTEND)
2909         {
2910           const absint_val_t x0 = aib.arg (0);
2911           if (! r_sign)
2912             r_sign = x0.regno;
2913           else if (r_sign != x0.regno)
2914             return fail ("too many signs");
2915
2916           // Signs are handled below after all the other bytes.
2917           regs_signs |= regmask (r_dest, 1);
2918         }
2919     }
2920
2921   // Step 2: Work on the individual bytes and emit according insns.
2922
2923   n_new_insns = 0;
2924   memento_t memo = curr.regs;
2925
2926   const int step = has_lsl ? -1 : 1;
2927   const int istart = step == 1 ? 0 : n_bytes - 1;
2928   const int iend = step == 1 ? n_bytes : -1;
2929
2930   for (int i = istart; i != iend; i += step)
2931     {
2932       const absint_byte_t &aib = ii.m_ai[i];
2933       const int r_dest = ii.m_regno + i;
2934       rtx_code code = aib.get_code ();
2935       rtx xsrc = NULL_RTX;
2936       rtx xdest = gen_rtx_REG (QImode, r_dest);
2937
2938       if (code == SET)
2939         {
2940           const int r_src = aib.regno (false);
2941           const int val8 = aib.val8 (false);
2942           int r16;
2943
2944           // A no-op...
2945           if (r_dest == r_src)
2946             continue;
2947           // ...or an existing 16-bit constant...
2948           else if (AVR_HAVE_MOVW
2949                    && i + step != iend
2950                    // Next is not a no-op.
2951                    && ii.m_ai[i + step].regno (false) != r_dest + step
2952                    // Eligible for MOVW.
2953                    && r_dest + step == (r_dest ^ 1)
2954                    && r_dest % 2 == i % 2
2955                    && (r16 = ii.m_ai.reg16_with_value (i, i + step, memo)))
2956             {
2957               xdest = gen_rtx_REG (HImode, r_dest & ~1);
2958               xsrc = gen_rtx_REG (HImode, r16);
2959               i += step;
2960             }
2961           // ...or a reg-reg move from a multi-byte move...
2962           else if (r_src
2963                    // Prefer a reg-reg move over a (potential) load
2964                    // of a constant, because the subsequent RTL
2965                    // peephole pass may combine it to a MOVW again.
2966                    && AVR_HAVE_MOVW
2967                    && REG_P (curr.ii.m_src))
2968             xsrc = gen_rtx_REG (QImode, r_src);
2969           // ...or a cheap constant...
2970           else if (val8 >= 0
2971                    && AVRasm::constant_cost (SET, r_dest, val8) <= 1)
2972             xsrc = gen_int_mode (val8, QImode);
2973           // ...or a reg-reg move...
2974           else if (r_src)
2975             xsrc = gen_rtx_REG (QImode, r_src);
2976           // ...or a costly constant that already exists in some reg...
2977           else if (memo.regno_with_value (val8, 0 /* excludes: none */))
2978             xsrc = gen_rtx_REG (QImode, memo.regno_with_value (val8, 0));
2979           // ...or a costly constant loaded into curr.insn's scratch reg...
2980           else if (ii.m_scratch)
2981             {
2982               rtx xscratch = gen_rtx_REG (QImode, ii.m_scratch);
2983               rtx xval8 = gen_int_mode (val8, QImode);
2984               emit_and_apply_move (memo, xscratch, xval8);
2985               xsrc = xscratch;
2986             }
2987           // ...or a costly constant (1 or -1) that doesn't need a scratch.
2988           else if (! AVRasm::ldi_needs_scratch (r_dest, val8))
2989             {
2990               extra_cost += 1;
2991               xsrc = gen_int_mode (val8, QImode);
2992             }
2993           else
2994             return fail ("expensive val8");
2995         } // SET
2996       else if (aib.arity () == 1)
2997         {
2998           if (aib.get_code () == SIGN_EXTEND)
2999             // Signs are handled after all the others.
3000             continue;
3001           else
3002             {
3003               const absint_val_t x0 = aib.arg (0);
3004               rtx xop0 = gen_rtx_REG (QImode, x0.regno);
3005               xsrc = gen_rtx_fmt_e (code, QImode, xop0);
3006             }
3007         } // unary
3008       else if (aib.arity () == 2)
3009         {
3010           absint_val_t x0;
3011           absint_val_t x1;
3012           insn_optimize_data_t iod (memo);
3013           iod.ii = curr.ii;
3014
3015           if (! get_2ary_operands (code, aib, iod, r_dest, x0, x1, extra_cost))
3016             return false;
3017           rtx xop0 = gen_rtx_REG (QImode, x0.regno);
3018           rtx xop1 = x1.knows_val8 ()
3019             ? gen_int_mode (x1.val8, QImode)
3020             : gen_rtx_REG (QImode, x1.regno);
3021
3022           xsrc = gen_rtx_fmt_ee (code, QImode, xop0, xop1);
3023         } // binary
3024
3025       if (! xsrc)
3026         return fail ("no source found");
3027
3028       if (r_sign
3029           && (regmask (xdest) & regmask (r_sign, 1)))
3030         return fail ("clobbered r_sign");
3031
3032       emit_and_apply_move (memo, xdest, xsrc);
3033     }
3034
3035   // Step 3: Emit insns for sign extend.
3036   // No more need to track memo beyond this point.
3037
3038   if (! emit_signs (r_sign, regs_signs))
3039     return false;
3040
3041   if (binop_cost >= 0)
3042     {
3043       avr_dump (";; Expected cost: %d + %d\n", n_new_insns, extra_cost);
3044       if (n_new_insns + extra_cost > binop_cost)
3045         return fail ("too expensive");
3046     }
3047
3048   if (ii.m_scratch)
3049     ignore_mask = regmask (ii.m_scratch, 1);
3050
3051   return true;
3052 }
3053
3054
3055 // A helper for try_split_any() above.
3056 // Emit sign extends from R_MSB.7 to all regs in REGS_SIGNS.
3057 bool
3058 optimize_data_t::emit_signs (const int r_msb, gprmask_t regs_signs)
3059 {
3060   if (! regs_signs)
3061     return true;
3062   else if (! r_msb)
3063     return fail ("fatal: no r_msb given");
3064
3065   // Pick an arbitrary reg from the sign destinations when the source
3066   // isn't one of the signs.
3067   const int r_signs = regs_signs & regmask (r_msb, 1)
3068     ? r_msb
3069     : ctz_hwi (regs_signs);
3070
3071   // Set all bits in r_signs according to the sign of r_msb using the
3072   // r,r,C07 alternative of ashrqi3.
3073   rtx xsrc = gen_rtx_fmt_ee (ASHIFTRT, QImode,
3074                              gen_rtx_REG (QImode, r_msb), GEN_INT (7));
3075   emit_valid_move_clobbercc (gen_rtx_REG (QImode, r_signs), xsrc);
3076   regs_signs &= ~regmask (r_signs, 1);
3077
3078   // Set up a 16-bit sign register if possible.
3079   int r16_signs = 0;
3080   if (regs_signs & regmask (r_signs ^ 1, 1))
3081     {
3082       emit_move_mask (r_signs ^ 1, r_signs, 1, regs_signs);
3083       r16_signs = r_signs & ~1;
3084     }
3085
3086   // Handle all 16-bit signs regs provided MOVW.
3087   if (AVR_HAVE_MOVW)
3088     for (int r = FIRST_GPR; r < REG_32; r += 2)
3089       {
3090         const gprmask_t m = regmask (r, 2);
3091         if ((m & regs_signs) == m)
3092           {
3093             if (r16_signs)
3094               emit_move_mask (r, r16_signs, 2, regs_signs);
3095             else
3096               {
3097                 emit_move_mask (r + 0, r_signs, 1, regs_signs);
3098                 emit_move_mask (r + 1, r_signs, 1, regs_signs);
3099                 r16_signs = r;
3100               }
3101           }
3102       }
3103
3104   // Handle all remaining signs.
3105   while (regs_signs)
3106     emit_move_mask (ctz_hwi (regs_signs), r_signs, 1, regs_signs);
3107
3108   return true;
3109 }
3110
3111 // Helper for the method above.  Move N_BYTES registers from R_SRC to R_DST,
3112 // keeping track of which regs are still to be done in MASK.
3113 void
3114 optimize_data_t::emit_move_mask (int r_dst, int r_src, int n_bytes,
3115                                  gprmask_t &mask)
3116 {
3117   const gprmask_t mask_dst = regmask (r_dst, n_bytes);
3118   const gprmask_t mask_src = regmask (r_src, n_bytes);
3119   gcc_assert ((mask_dst & mask) == mask_dst);
3120   gcc_assert ((mask_src & mask) == 0);
3121   rtx xdst = gen_rtx_REG (size_to_mode (n_bytes), r_dst);
3122   rtx xsrc = gen_rtx_REG (size_to_mode (n_bytes), r_src);
3123   emit_valid_move_clobbercc (xdst, xsrc);
3124   n_new_insns += 1;
3125   mask &= ~mask_dst;
3126 }
3127
3128
3129 void
3130 bbinfo_t::optimize_one_block (bool &changed)
3131 {
3132   memento_t prev_regs;
3133
3134   rtx_insn *insn = next_nondebug_insn_bb (bb, BB_HEAD (bb));
3135
3136   for (rtx_insn *next_insn; insn; insn = next_insn)
3137     {
3138       next_insn = next_nondebug_insn_bb (bb, insn);
3139
3140       avr_dump ("\n;; Working on insn %d\n%r\n\n", INSN_UID (insn), insn);
3141
3142       optimize_data_t od (prev_regs, regs);
3143
3144       od.prev.insn = prev_nondebug_insn_bb (bb, insn);
3145       od.curr.insn = insn;
3146
3147       od.prev.ii.init1 (od.prev, 1, "IIprev ");
3148       od.curr.ii.init1 (od.curr, 8, "IIcurr ");
3149
3150       start_sequence ();
3151
3152       bool found = ((bbinfo_t::try_fuse_p && od.try_fuse (this))
3153                     || (bbinfo_t::try_bin_arg1_p && od.try_bin_arg1 (this))
3154                     || (bbinfo_t::try_simplify_p && od.try_simplify (this))
3155                     || (bbinfo_t::try_split_ldi_p && od.try_split_ldi (this))
3156                     || (bbinfo_t::try_split_any_p && od.try_split_any (this))
3157                     || (bbinfo_t::try_mem0_p && od.try_mem0 (this)));
3158
3159       rtx_insn *new_insns = get_insns ();
3160       end_sequence ();
3161
3162       gcc_assert (found == (od.n_new_insns >= 0));
3163
3164       ++tick;
3165
3166       // This insn will become the previous one in the next loop iteration.
3167       // Just used in dumps.
3168       rtx_insn *new_curr_insn;
3169
3170       if (! found)
3171         {
3172           // Nothing changed.
3173           avr_dump (";; Keeping old route.\n");
3174           gcc_assert (! od.delete_prev_p);
3175
3176           prev_regs = regs;
3177           regs.apply_insn (insn, false);
3178
3179           new_curr_insn = insn;
3180         }
3181       else
3182         {
3183           // We have new_insns.
3184           changed = true;
3185
3186           if (dump_file)
3187             {
3188               avr_dump ("\n;; EMIT %d new insn%s replacing ",
3189                         od.n_new_insns, "s" + (od.n_new_insns == 1));
3190               if (od.delete_prev_p)
3191                 avr_dump ("insn %d and ", INSN_UID (od.prev.insn));
3192               avr_dump ("insn %d, delete_prev=%d:\n%L\n", INSN_UID (insn),
3193                         od.delete_prev_p, new_insns);
3194             }
3195
3196           new_curr_insn = od.emit_sequence (bb, new_insns);
3197         } // found
3198
3199       if (dump_file && new_curr_insn)
3200         {
3201           avr_dump ("\n");
3202
3203           const int d = regs.distance_to (prev_regs);
3204           if (d || new_curr_insn != insn)
3205             avr_dump (";; %d regs changed state:\n", d);
3206
3207           if (new_curr_insn != insn)
3208             {
3209               avr_dump (";; Befor insn %d", INSN_UID (new_curr_insn));
3210               prev_regs.dump ();
3211             }
3212
3213           avr_dump (";; After insn %d", INSN_UID (new_curr_insn));
3214           regs.dump ();
3215         }
3216     } // for BB insns
3217 }
3218
3219
3220 void
3221 bbinfo_t::optimize_one_function (function *func)
3222 {
3223   bbinfo_t::fpd = XNEW (bbinfo_t::find_plies_data_t);
3224   bbinfo_t::bb_info = XCNEWVEC (bbinfo_t, last_basic_block_for_fn (func));
3225   int *post_order = XNEWVEC (int, n_basic_blocks_for_fn (func));
3226
3227   plies_t::max_n_plies = 0;
3228
3229   using elt0_getter_HRS = elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>;
3230   memento_t::fixed_regs_mask = (gprmask_t) elt0_getter_HRS::get (fixed_reg_set);
3231
3232   // Option -mfuse-move=<0,23> provides a 3:2:2:2 mixed radix value:
3233   // -mfuse-move= 0 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 20 1 2 3  Digit
3234   // fuse           1   1   1   1   1    1   1   1   1   1    1   1      0
3235   // bin_arg1         1 1     1 1      1 1     1 1     1 1      1 1      1
3236   // split_any            1 1 1 1          1 1 1 1          1 1 1 1      2
3237   // split_ldi                    1 1  1 1 1 1 1 1 1 1 1 1  1 1 1 1      3
3238   // use arith                                     1 1 1 1  1 1 1 1      3
3239
3240   // Which optimization(s) to perform.
3241   bbinfo_t::try_fuse_p = avropt_fuse_move & 0x1;      // Digit 0 in [0, 1].
3242   bbinfo_t::try_mem0_p = avropt_fuse_move & 0x1;      // Digit 0 in [0, 1].
3243   bbinfo_t::try_bin_arg1_p = avropt_fuse_move & 0x2;  // Digit 1 in [0, 1].
3244   bbinfo_t::try_split_any_p = avropt_fuse_move & 0x4; // Digit 2 in [0, 1].
3245   bbinfo_t::try_split_ldi_p = avropt_fuse_move >> 3;    // Digit 3 in [0, 2].
3246   bbinfo_t::use_arith_p = (avropt_fuse_move >> 3) >= 2; // Digit 3 in [0, 2].
3247   bbinfo_t::use_set_some_p = bbinfo_t::try_split_ldi_p; // Digit 3 in [0, 2].
3248   bbinfo_t::try_simplify_p = avropt_fuse_move != 0;
3249
3250   // Topologically sort BBs from last to first.
3251
3252   const int n_post_order = post_order_compute (post_order, false, false);
3253   bool changed = false;
3254
3255   // Traverse the BBs from first to last in order to increase the chance
3256   // that register values from all incoming edges are known.
3257
3258   for (int n = n_post_order - 1; n >= 0; --n)
3259     {
3260       basic_block bb = BASIC_BLOCK_FOR_FN (func, post_order[n]);
3261
3262       bbinfo_t::bb_info[bb->index].bb = bb;
3263       bbinfo_t::bb_info[bb->index].enter ();
3264       bbinfo_t::bb_info[bb->index].optimize_one_block (changed);
3265       bbinfo_t::bb_info[bb->index].leave ();
3266     }
3267
3268   if (plies_t::max_n_plies)
3269     avr_dump (";; max_n_plies=%d\n", (int) plies_t::max_n_plies);
3270
3271   if (changed)
3272     {
3273       df_note_add_problem ();
3274       df_analyze ();
3275     }
3276
3277   XDELETEVEC (post_order);
3278   XDELETEVEC (bbinfo_t::bb_info);
3279   XDELETE (bbinfo_t::fpd);
3280 }
3281
3282 } // anonymous namespace
3283
3284
3285 namespace
3286 {
3287
3288 \f
3289 //////////////////////////////////////////////////////////////////////////////
3290 // Try to replace 2 cbranch insns with 1 comparison and 2 branches.
3291
3292 static const pass_data avr_pass_data_ifelse =
3293 {
3294   RTL_PASS,      // type
3295   "",            // name (will be patched)
3296   OPTGROUP_NONE, // optinfo_flags
3297   TV_DF_SCAN,    // tv_id
3298   0,             // properties_required
3299   0,             // properties_provided
3300   0,             // properties_destroyed
3301   0,             // todo_flags_start
3302   TODO_df_finish | TODO_df_verify // todo_flags_finish
3303 };
3304
3305 class avr_pass_ifelse : public rtl_opt_pass
3306 {
3307 public:
3308   avr_pass_ifelse (gcc::context *ctxt, const char *name)
3309     : rtl_opt_pass (avr_pass_data_ifelse, ctxt)
3310   {
3311     this->name = name;
3312   }
3313
3314   bool gate (function *) final override
3315   {
3316     return optimize > 0;
3317   }
3318
3319   unsigned int execute (function *func) final override;
3320 }; // avr_pass_ifelse
3321
3322
3323 /* Return TRUE iff comparison code CODE is explicitly signed.  */
3324
3325 static bool
3326 avr_strict_signed_p (rtx_code code)
3327 {
3328   return code == GT || code == GE || code == LT || code == LE;
3329 }
3330
3331
3332 /* Return TRUE iff comparison code CODE is explicitly unsigned.  */
3333
3334 static bool
3335 avr_strict_unsigned_p (rtx_code code)
3336 {
3337   return code == GTU || code == GEU || code == LTU || code == LEU;
3338 }
3339
3340 #include "config/avr/ranges.h"
3341
3342 /* Suppose the inputs represent a code like
3343
3344       if (x <CMP1> XVAL1)  goto way1;
3345       if (x <CMP2> XVAL2)  goto way2;
3346       way3:;
3347
3348    with two integer mode comparisons where XVAL1 and XVAL2 are CONST_INT.
3349    When this can be rewritten in the form
3350
3351       if (x <cond1> xval)  goto way1;
3352       if (x <cond2> xval)  goto way2;
3353       way3:;
3354
3355   then set CMP1 = cond1, CMP2 = cond2, and return xval.  Else return NULL_RTX.
3356   When SWAPT is returned true, then way1 and way2 must be swapped.
3357   When the incomping SWAPT is false, the outgoing one will be false, too.  */
3358
3359 static rtx
3360 avr_2comparisons_rhs (rtx_code &cmp1, rtx xval1,
3361                       rtx_code &cmp2, rtx xval2,
3362                       machine_mode mode, bool &swapt)
3363 {
3364   const bool may_swapt = swapt;
3365   swapt = false;
3366
3367   //////////////////////////////////////////////////////////////////
3368   // Step 0: Decide about signedness, map xval1/2 to the range
3369   //         of [un]signed machine mode.
3370
3371   const bool signed1_p = avr_strict_signed_p (cmp1);
3372   const bool signed2_p = avr_strict_signed_p (cmp2);
3373   const bool unsigned1_p = avr_strict_unsigned_p (cmp1);
3374   const bool unsigned2_p = avr_strict_unsigned_p (cmp2);
3375   const bool signed_p = signed1_p || signed2_p;
3376   bool unsigned_p = unsigned1_p || unsigned2_p;
3377
3378   using T = Ranges::scalar_type;
3379   T val1 = INTVAL (xval1);
3380   T val2 = INTVAL (xval2);
3381
3382   if (signed_p + unsigned_p > 1)
3383     {
3384       // Don't go down that rabbit hole.  When the RHSs are the
3385       // same, we can still save one comparison.
3386       return val1 == val2 ? xval1 : NULL_RTX;
3387     }
3388
3389   // Decide about signedness.  When no explicit signedness is present,
3390   // then cases that are close to the unsigned boundary like  EQ 0, EQ 1
3391   // can also be optimized.
3392   if (unsigned_p
3393       || (! signed_p && IN_RANGE (val1, -2, 2)))
3394     {
3395       unsigned_p = true;
3396       val1 = UINTVAL (xval1) & GET_MODE_MASK (mode);
3397       val2 = UINTVAL (xval2) & GET_MODE_MASK (mode);
3398     }
3399
3400   // No way we can decompose the domain in a usable manner when the
3401   // RHSes are too far apart.
3402   if (! IN_RANGE (val1 - val2, -2, 2))
3403     return NULL_RTX;
3404
3405   //////////////////////////////////////////////////////////////////
3406   // Step 1: Represent the input conditions as truth Ranges.  This
3407   //         establishes a decomposition / coloring of the domain.
3408
3409   Ranges dom = Ranges::NBitsRanges (GET_MODE_BITSIZE (mode), unsigned_p,
3410                                     Ranges::ALL);
3411   Ranges r[4] = { dom, dom.truth (cmp1, val1), dom.truth (cmp2, val2), dom };
3412
3413   // r[1] shadows r[2] shadows r[3].  r[0] is just for nice indices.
3414   r[3].minus (r[2]);
3415   r[3].minus (r[1]);
3416   r[2].minus (r[1]);
3417
3418   //////////////////////////////////////////////////////////////////
3419   // Step 2: Filter for cases where the domain decomposes into three
3420   //         intervals:  One to the left, one to the right, and one
3421   //         in the middle where the latter holds exactly one value.
3422
3423   for (int i = 1; i <= 3; ++i)
3424     {
3425       // Keep track of which Ranges is which.
3426       r[i].tag = i;
3427
3428       gcc_assert (r[i].check ());
3429
3430       // Filter for proper intervals.  Also return for the empty set,
3431       // since cases where [m_min, m_max] decomposes into two intervals
3432       // or less have been sorted out by the generic optimizers already,
3433       // and hence should not be seen here.  And more than two intervals
3434       // at a time cannot be optimized of course.
3435       if (r[i].size () != 1)
3436         return NULL_RTX;
3437     }
3438
3439   // Bubble-sort the three intervals such that:
3440   // [1] is the left interval, i.e. the one taken by LT[U].
3441   // [2] is the middle interval, i.e. the one taken by EQ.
3442   // [3] is the right interval, i.e. the one taken by GT[U].
3443   Ranges::sort2 (r[1], r[3]);
3444   Ranges::sort2 (r[2], r[3]);
3445   Ranges::sort2 (r[1], r[2]);
3446
3447   if (dump_file)
3448     fprintf (dump_file,
3449              ";; Decomposed: .%d=[%ld, %ld] .%d=[%ld, %ld] .%d=[%ld, %ld]\n",
3450              r[1].tag, (long) r[1].ranges[0].lo, (long) r[1].ranges[0].hi,
3451              r[2].tag, (long) r[2].ranges[0].lo, (long) r[2].ranges[0].hi,
3452              r[3].tag, (long) r[3].ranges[0].lo, (long) r[3].ranges[0].hi);
3453
3454   // EQ / NE can handle only one value.
3455   if (r[2].cardinality (0) != 1)
3456     return NULL_RTX;
3457
3458   // Success! This is the sought for xval.
3459   const T val = r[2].ranges[0].lo;
3460
3461   //////////////////////////////////////////////////////////////////
3462   // Step 3: Work out which label gets which condition, trying to
3463   //         avoid the expensive codes GT[U] and LE[U] if possible.
3464   //         Avoiding expensive codes is always possible when labels
3465   //         way1 and way2 may be swapped.
3466
3467   // The xx1 ways have an expensive GT for cmp1 which can be avoided
3468   // by swapping way1 with way2.
3469   swapt = may_swapt && r[3].tag == 1;
3470   if (swapt)
3471     std::swap (r[3], r[2].tag == 2 ? r[2] : r[1]);
3472
3473   // 6 = 3! ways to assign LT, EQ, GT to the three labels.
3474   const int way = 100 * r[1].tag + 10 * r[2].tag + r[3].tag;
3475
3476   if (dump_file)
3477     fprintf (dump_file, ";; Success: unsigned=%d, swapt=%d, way=%d, rhs=%ld\n",
3478              unsigned_p, swapt, way, (long) val);
3479
3480 #define WAY(w, c1, c2)                                  \
3481   case w:                                               \
3482     cmp1 = unsigned_p ? unsigned_condition (c1) : c1;   \
3483     cmp2 = unsigned_p ? unsigned_condition (c2) : c2;   \
3484     break;
3485
3486   switch (way)
3487     {
3488     default:
3489       gcc_unreachable ();
3490
3491       // cmp1 gets the LT, avoid difficult branches for cmp2.
3492       WAY (123, LT, EQ);
3493       WAY (132, LT, NE);
3494
3495       // cmp1 gets the EQ, avoid difficult branches for cmp2.
3496       WAY (213, EQ, LT);
3497       WAY (312, EQ, GE);
3498
3499       // cmp1 gets the difficult GT, unavoidable as we may not swap way1/2.
3500       WAY (231, GT, NE);
3501       WAY (321, GT, EQ);
3502     }
3503
3504 #undef WAY
3505
3506   return gen_int_mode (val, mode);
3507 }
3508
3509
3510 /* A helper for the next method.  Suppose we have two conditional branches
3511    with REG and CONST_INT operands
3512
3513       if (reg <cond1> xval1) goto label1;
3514       if (reg <cond2> xval2) goto label2;
3515
3516    If the second comparison is redundant and there are codes <cmp1>
3517    and <cmp2> such that the sequence can be performed as
3518
3519       REG_CC = compare (reg, xval);
3520       if (REG_CC <cmp1> 0) goto label1;
3521       if (REG_CC <cmp2> 0) goto label2;
3522
3523    then set COND1 to cmp1, COND2 to cmp2, SWAPT to true when the branch
3524    targets have to be swapped, and return XVAL.  Otherwise, return NULL_RTX.
3525    This function may clobber COND1 and COND2 even when it returns NULL_RTX.
3526
3527    REVERSE_COND1 can be set to reverse condition COND1.  This is useful
3528    when the second comparison does not follow the first one, but is
3529    located after label1 like in:
3530
3531       if (reg <cond1> xval1) goto label1;
3532       ...
3533       label1:
3534       if (reg <cond2> xval2) goto label2;
3535
3536    In such a case we cannot swap the labels, and we may end up with a
3537    difficult branch -- though one comparison can still be optimized out.
3538    Getting rid of such difficult branches would require to reorder blocks. */
3539
3540 static rtx
3541 avr_redundant_compare (rtx xreg1, rtx_code &cond1, rtx xval1,
3542                        rtx xreg2, rtx_code &cond2, rtx xval2,
3543                        bool reverse_cond1, bool &swapt)
3544 {
3545   // Make sure we have two REG <cond> CONST_INT comparisons with the same reg.
3546   if (! rtx_equal_p (xreg1, xreg2)
3547       || ! CONST_INT_P (xval1)
3548       || ! CONST_INT_P (xval2))
3549     return NULL_RTX;
3550
3551   if (reverse_cond1)
3552     cond1 = reverse_condition (cond1);
3553
3554   // Allow swapping label1 <-> label2 only when ! reverse_cond1.
3555   swapt = ! reverse_cond1;
3556   rtx_code c1 = cond1;
3557   rtx_code c2 = cond2;
3558   rtx xval = avr_2comparisons_rhs (c1, xval1,
3559                                    c2, xval2, GET_MODE (xreg1), swapt);
3560   if (! xval)
3561     return NULL_RTX;
3562
3563   if (dump_file)
3564     {
3565       rtx_code a1 = reverse_cond1 ? reverse_condition (cond1) : cond1;
3566       rtx_code b1 = reverse_cond1 ? reverse_condition (c1) : c1;
3567       const char *s_rev1 = reverse_cond1 ? " reverse_cond1" : "";
3568       avr_dump (";; cond1: %C %r%s\n", a1, xval1, s_rev1);
3569       avr_dump (";; cond2: %C %r\n", cond2, xval2);
3570       avr_dump (";; => %C %d\n", b1, (int) INTVAL (xval));
3571       avr_dump (";; => %C %d\n", c2, (int) INTVAL (xval));
3572     }
3573
3574   cond1 = c1;
3575   cond2 = c2;
3576
3577   return xval;
3578 }
3579
3580
3581 /* Similar to the function above, but assume that
3582
3583       if (xreg1 <cond1> xval1) goto label1;
3584       if (xreg2 <cond2> xval2) goto label2;
3585
3586    are two subsequent REG-REG comparisons.  When this can be represented as
3587
3588       REG_CC = compare (reg, xval);
3589       if (REG_CC <cmp1> 0) goto label1;
3590       if (REG_CC <cmp2> 0) goto label2;
3591
3592    then set XREG1 to reg, COND1 and COND2 accordingly, and return xval.
3593    Otherwise, return NULL_RTX.  This optmization can be performed
3594    when { xreg1, xval1 } and { xreg2, xval2 } are equal as sets.
3595    It can be done in such a way that no difficult branches occur.  */
3596
3597 static rtx
3598 avr_redundant_compare_regs (rtx &xreg1, rtx_code &cond1, rtx &xval1,
3599                             rtx &xreg2, rtx_code &cond2, rtx &xval2,
3600                             bool reverse_cond1)
3601 {
3602   bool swapped;
3603
3604   if (! REG_P (xval1))
3605     return NULL_RTX;
3606   else if (rtx_equal_p (xreg1, xreg2)
3607            && rtx_equal_p (xval1, xval2))
3608     swapped = false;
3609   else if (rtx_equal_p (xreg1, xval2)
3610            && rtx_equal_p (xreg2, xval1))
3611     swapped = true;
3612   else
3613     return NULL_RTX;
3614
3615   // Found a redundant REG-REG comparison.  Assume that the incoming
3616   // representation has been canonicalized by CANONICALIZE_COMPARISON.
3617   // We can always represent this using only one comparison and in such
3618   // a way that no difficult branches are required.
3619
3620   if (dump_file)
3621     {
3622       const char *s_rev1 = reverse_cond1 ? " reverse_cond1" : "";
3623       avr_dump (";; %r %C %r%s\n", xreg1, cond1, xval1, s_rev1);
3624       avr_dump (";; %r %C %r\n", xreg2, cond2, xval2);
3625     }
3626
3627   if (reverse_cond1)
3628     cond1 = reverse_condition (cond1);
3629
3630   if (swapped)
3631     {
3632       if (cond1 == EQ || cond1 == NE)
3633         {
3634           avr_dump (";; case #21\n");
3635           std::swap (xreg1, xval1);
3636         }
3637       else
3638         {
3639           std::swap (xreg2, xval2);
3640           cond2 = swap_condition (cond2);
3641
3642           // The swap may have introduced a difficult comparison.
3643           // In order to get of it, only a few cases need extra care.
3644           if ((cond1 == LT && cond2 == GT)
3645               || (cond1 == LTU && cond2 == GTU))
3646             {
3647               avr_dump (";; case #22\n");
3648               cond2 = NE;
3649             }
3650           else
3651             avr_dump (";; case #23\n");
3652         }
3653     }
3654   else
3655     avr_dump (";; case #20\n");
3656
3657   return xval1;
3658 }
3659
3660
3661 /* INSN1 and INSN2 are two cbranch insns for the same integer mode.
3662    When FOLLOW_LABEL1 is false, then INSN2 is located in the fallthrough
3663    path of INSN1.  When FOLLOW_LABEL1 is true, then INSN2 is located at
3664    the true edge of INSN1, INSN2 is preceded by a barrier, and no other
3665    edge leads to the basic block of INSN2.
3666
3667    Try to replace INSN1 and INSN2 by a compare insn and two branch insns.
3668    When such a replacement has been performed, then return the insn where the
3669    caller should continue scanning the insn stream.  Else, return nullptr.  */
3670
3671 static rtx_insn *
3672 avr_optimize_2ifelse (rtx_jump_insn *insn1,
3673                       rtx_jump_insn *insn2, bool follow_label1)
3674 {
3675   avr_dump (";; Investigating jump_insn %d and jump_insn %d.\n",
3676             INSN_UID (insn1), INSN_UID (insn2));
3677
3678   // Extract the operands of the insns:
3679   // $0 = comparison operator ($1, $2)
3680   // $1 = reg
3681   // $2 = reg or const_int
3682   // $3 = code_label
3683   // $4 = optional SCRATCH for HI, PSI, SI cases.
3684
3685   const auto &op = recog_data.operand;
3686
3687   extract_insn (insn1);
3688   rtx xop1[5] = { op[0], op[1], op[2], op[3], op[4] };
3689   int n_operands = recog_data.n_operands;
3690
3691   extract_insn (insn2);
3692   rtx xop2[5] = { op[0], op[1], op[2], op[3], op[4] };
3693
3694   rtx_code code1 = GET_CODE (xop1[0]);
3695   rtx_code code2 = GET_CODE (xop2[0]);
3696   bool swap_targets = false;
3697
3698   // Search redundant REG-REG comparison.
3699   rtx xval = avr_redundant_compare_regs (xop1[1], code1, xop1[2],
3700                                          xop2[1], code2, xop2[2],
3701                                          follow_label1);
3702
3703   // Search redundant REG-CONST_INT comparison.
3704   if (! xval)
3705     xval = avr_redundant_compare (xop1[1], code1, xop1[2],
3706                                   xop2[1], code2, xop2[2],
3707                                   follow_label1, swap_targets);
3708   if (! xval)
3709     {
3710       avr_dump (";; Nothing found for jump_insn %d and jump_insn %d.\n",
3711                 INSN_UID (insn1), INSN_UID (insn2));
3712       return nullptr;
3713     }
3714
3715   if (follow_label1)
3716     code1 = reverse_condition (code1);
3717
3718   //////////////////////////////////////////////////////
3719   // Found a replacement.
3720
3721   if (dump_file)
3722     {
3723       avr_dump (";; => %C %r\n", code1, xval);
3724       avr_dump (";; => %C %r\n", code2, xval);
3725
3726       fprintf (dump_file, "\n;; Found chain of jump_insn %d and"
3727                " jump_insn %d, follow_label1=%d:\n",
3728                INSN_UID (insn1), INSN_UID (insn2), follow_label1);
3729       print_rtl_single (dump_file, PATTERN (insn1));
3730       print_rtl_single (dump_file, PATTERN (insn2));
3731     }
3732
3733   rtx_insn *next_insn
3734     = next_nonnote_nondebug_insn (follow_label1 ? insn1 : insn2);
3735
3736   // Pop the new branch conditions and the new comparison.
3737   // Prematurely split into compare + branch so that we can drop
3738   // the 2nd comparison.  The following pass, split2, splits all
3739   // insns for REG_CC, and it should still work as usual even when
3740   // there are already some REG_CC insns around.
3741
3742   rtx xcond1 = gen_rtx_fmt_ee (code1, VOIDmode, cc_reg_rtx, const0_rtx);
3743   rtx xcond2 = gen_rtx_fmt_ee (code2, VOIDmode, cc_reg_rtx, const0_rtx);
3744   rtx xpat1 = gen_branch (xop1[3], xcond1);
3745   rtx xpat2 = gen_branch (xop2[3], xcond2);
3746   rtx xcompare = NULL_RTX;
3747   machine_mode mode = GET_MODE (xop1[1]);
3748
3749   if (mode == QImode)
3750     {
3751       gcc_assert (n_operands == 4);
3752       xcompare = gen_cmpqi3 (xop1[1], xval);
3753     }
3754   else
3755     {
3756       gcc_assert (n_operands == 5);
3757       rtx scratch = GET_CODE (xop1[4]) == SCRATCH ? xop2[4] : xop1[4];
3758       rtx (*gen_cmp)(rtx,rtx,rtx)
3759         = mode == HImode  ? gen_gen_comparehi
3760         : mode == PSImode ? gen_gen_comparepsi
3761         : gen_gen_comparesi; // SImode
3762       xcompare = gen_cmp (xop1[1], xval, scratch);
3763     }
3764
3765   // Emit that stuff.
3766
3767   rtx_insn *cmp = emit_insn_before (xcompare, insn1);
3768   rtx_jump_insn *branch1 = emit_jump_insn_after (xpat1, insn1);
3769   rtx_jump_insn *branch2 = emit_jump_insn_after (xpat2, insn2);
3770
3771   JUMP_LABEL (branch1) = xop1[3];
3772   JUMP_LABEL (branch2) = xop2[3];
3773   // delete_insn() decrements LABEL_NUSES when deleting a JUMP_INSN,
3774   // but when we pop a new JUMP_INSN, do it by hand.
3775   ++LABEL_NUSES (xop1[3]);
3776   ++LABEL_NUSES (xop2[3]);
3777
3778   delete_insn (insn1);
3779   delete_insn (insn2);
3780
3781   if (swap_targets)
3782     {
3783       gcc_assert (! follow_label1);
3784
3785       basic_block to1 = BLOCK_FOR_INSN (xop1[3]);
3786       basic_block to2 = BLOCK_FOR_INSN (xop2[3]);
3787       edge e1 = find_edge (BLOCK_FOR_INSN (branch1), to1);
3788       edge e2 = find_edge (BLOCK_FOR_INSN (branch2), to2);
3789       gcc_assert (e1);
3790       gcc_assert (e2);
3791       redirect_edge_and_branch (e1, to2);
3792       redirect_edge_and_branch (e2, to1);
3793     }
3794
3795   // As a side effect, also recog the new insns.
3796   gcc_assert (valid_insn_p (cmp));
3797   gcc_assert (valid_insn_p (branch1));
3798   gcc_assert (valid_insn_p (branch2));
3799
3800   return next_insn;
3801 }
3802
3803
3804 /* Sequences like
3805
3806       SREG = compare (reg, 1 + val);
3807           if (SREG >= 0)  goto label1;
3808       SREG = compare (reg, val);
3809           if (SREG == 0)  goto label2;
3810
3811    can be optimized to
3812
3813       SREG = compare (reg, val);
3814           if (SREG == 0)  goto label2;
3815           if (SREG >= 0)  goto label1;
3816
3817    Almost all cases where one of the comparisons is redundant can
3818    be transformed in such a way that only one comparison is required
3819    and no difficult branches are needed.  */
3820
3821 unsigned int
3822 avr_pass_ifelse::execute (function *)
3823 {
3824   rtx_insn *next_insn;
3825
3826   for (rtx_insn *insn = get_insns (); insn; insn = next_insn)
3827     {
3828       next_insn = next_nonnote_nondebug_insn (insn);
3829
3830       if (! next_insn)
3831         break;
3832
3833       // Search for two cbranch insns.  The first one is a cbranch.
3834       // Filter for "cbranch<mode>4_insn" with mode in QI, HI, PSI, SI.
3835
3836       if (! JUMP_P (insn))
3837         continue;
3838
3839       int icode1 = recog_memoized (insn);
3840
3841       if (icode1 != CODE_FOR_cbranchqi4_insn
3842           && icode1 != CODE_FOR_cbranchhi4_insn
3843           && icode1 != CODE_FOR_cbranchpsi4_insn
3844           && icode1 != CODE_FOR_cbranchsi4_insn)
3845         continue;
3846
3847       rtx_jump_insn *insn1 = as_a<rtx_jump_insn *> (insn);
3848
3849       // jmp[0]: We can optimize cbranches that follow cbranch insn1.
3850       rtx_insn *jmp[2] = { next_insn, nullptr };
3851
3852       // jmp[1]: A cbranch following the label of cbranch insn1.
3853       if (LABEL_NUSES (JUMP_LABEL (insn1)) == 1)
3854         {
3855           rtx_insn *code_label1 = JUMP_LABEL_AS_INSN (insn1);
3856           rtx_insn *barrier = prev_nonnote_nondebug_insn (code_label1);
3857
3858           // When the target label of insn1 is used exactly once and is
3859           // not a fallthrough, i.e. is preceded by a barrier, then
3860           // consider the insn following that label.
3861           if (barrier && BARRIER_P (barrier))
3862             jmp[1] = next_nonnote_nondebug_insn (code_label1);
3863       }
3864
3865       // With almost certainty, only one of the two possible jumps can
3866       // be optimized with insn1, but it's hard to tell which one a priori.
3867       // Just try both.  In the unlikely case where both could be optimized,
3868       // prefer jmp[0] because eliminating difficult branches is impeded
3869       // by following label1.
3870
3871       for (int j = 0; j < 2; ++j)
3872         if (jmp[j] && JUMP_P (jmp[j])
3873             && recog_memoized (jmp[j]) == icode1)
3874           {
3875             rtx_insn *next
3876               = avr_optimize_2ifelse (insn1, as_a<rtx_jump_insn *> (jmp[j]),
3877                                       j == 1 /* follow_label1 */);
3878             if (next)
3879               {
3880                 next_insn = next;
3881                 break;
3882               }
3883           }
3884
3885     } // loop insns
3886
3887   return 0;
3888 }
3889
3890
3891 \f
3892 //////////////////////////////////////////////////////////////////////////////
3893 // Optimize results of the casesi expander for modes < SImode.
3894
3895 static const pass_data avr_pass_data_casesi =
3896 {
3897   RTL_PASS,      // type
3898   "",            // name (will be patched)
3899   OPTGROUP_NONE, // optinfo_flags
3900   TV_DF_SCAN,    // tv_id
3901   0,             // properties_required
3902   0,             // properties_provided
3903   0,             // properties_destroyed
3904   0,             // todo_flags_start
3905   0              // todo_flags_finish
3906 };
3907
3908 class avr_pass_casesi : public rtl_opt_pass
3909 {
3910 public:
3911   avr_pass_casesi (gcc::context *ctxt, const char *name)
3912     : rtl_opt_pass (avr_pass_data_casesi, ctxt)
3913   {
3914     this->name = name;
3915   }
3916
3917   bool gate (function *) final override
3918   {
3919     return optimize > 0;
3920   }
3921
3922   unsigned int execute (function *) final override;
3923 }; // avr_pass_casesi
3924
3925
3926 /* Make one parallel insn with all the patterns from insns i[0]..i[5].  */
3927
3928 static rtx_insn *
3929 avr_parallel_insn_from_insns (rtx_insn *i[5])
3930 {
3931   rtvec vec = gen_rtvec (5, PATTERN (i[0]), PATTERN (i[1]), PATTERN (i[2]),
3932                          PATTERN (i[3]), PATTERN (i[4]));
3933   start_sequence ();
3934   emit (gen_rtx_PARALLEL (VOIDmode, vec));
3935   rtx_insn *insn = get_insns ();
3936   end_sequence ();
3937
3938   return insn;
3939 }
3940
3941
3942 /* Return true if we see an insn stream generated by casesi expander together
3943    with an extension to SImode of the switch value.
3944
3945    If this is the case, fill in the insns from casesi to INSNS[1..5] and
3946    the SImode extension to INSNS[0].  Moreover, extract the operands of
3947    pattern casesi_<mode>_sequence forged from the sequence to recog_data.  */
3948
3949 static bool
3950 avr_is_casesi_sequence (basic_block bb, rtx_insn *insn, rtx_insn *insns[5])
3951 {
3952   rtx set_4, set_0;
3953
3954   /* A first and quick test for a casesi sequences.  As a side effect of
3955      the test, harvest respective insns to INSNS[0..4].  */
3956
3957   if (!(JUMP_P (insns[4] = insn)
3958         // casesi is the only insn that comes up with UNSPEC_INDEX_JMP,
3959         // hence the following test ensures that we are actually dealing
3960         // with code from casesi.
3961         && (set_4 = single_set (insns[4]))
3962         && UNSPEC == GET_CODE (SET_SRC (set_4))
3963         && UNSPEC_INDEX_JMP == XINT (SET_SRC (set_4), 1)
3964
3965         && (insns[3] = prev_real_insn (insns[4]))
3966         && (insns[2] = prev_real_insn (insns[3]))
3967         && (insns[1] = prev_real_insn (insns[2]))
3968
3969         // Insn prior to casesi.
3970         && (insns[0] = prev_real_insn (insns[1]))
3971         && (set_0 = single_set (insns[0]))
3972         && extend_operator (SET_SRC (set_0), SImode)))
3973     {
3974       return false;
3975     }
3976
3977   if (dump_file)
3978     {
3979       fprintf (dump_file, ";; Sequence from casesi in "
3980                "[bb %d]:\n\n", bb->index);
3981       for (int i = 0; i < 5; i++)
3982         print_rtl_single (dump_file, insns[i]);
3983     }
3984
3985   /* We have to deal with quite some operands.  Extracting them by hand
3986      would be tedious, therefore wrap the insn patterns into a parallel,
3987      run recog against it and then use insn extract to get the operands. */
3988
3989   rtx_insn *xinsn = avr_parallel_insn_from_insns (insns);
3990
3991   INSN_CODE (xinsn) = recog (PATTERN (xinsn), xinsn, NULL /* num_clobbers */);
3992
3993   /* Failing to recognize means that someone changed the casesi expander or
3994      that some passes prior to this one performed some unexpected changes.
3995      Gracefully drop such situations instead of aborting.  */
3996
3997   if (INSN_CODE (xinsn) < 0)
3998     {
3999       if (dump_file)
4000         fprintf (dump_file, ";; Sequence not recognized, giving up.\n\n");
4001
4002       return false;
4003     }
4004
4005   gcc_assert (CODE_FOR_casesi_qi_sequence == INSN_CODE (xinsn)
4006               || CODE_FOR_casesi_hi_sequence == INSN_CODE (xinsn));
4007
4008   extract_insn (xinsn);
4009
4010   // Assert on the anatomy of xinsn's operands we are going to work with.
4011
4012   gcc_assert (recog_data.n_operands == 12);
4013   gcc_assert (recog_data.n_dups == 3);
4014
4015   if (dump_file)
4016     {
4017       fprintf (dump_file, ";; Operands extracted:\n");
4018       for (int i = 0; i < recog_data.n_operands; i++)
4019         avr_fdump (dump_file, ";; $%d = %r\n", i, recog_data.operand[i]);
4020       fprintf (dump_file, "\n");
4021     }
4022
4023   return true;
4024 }
4025
4026
4027 /* INSNS[1..4] is a sequence as generated by casesi and INSNS[0] is an
4028    extension of an 8-bit or 16-bit integer to SImode.  XOP contains the
4029    operands of INSNS as extracted by insn_extract from pattern
4030    casesi_<mode>_sequence:
4031
4032       $0: SImode reg switch value as result of $10.
4033       $1: Negative of smallest index in switch.
4034       $2: Number of entries in switch.
4035       $3: Label to table.
4036       $4: Label if out-of-bounds.
4037       $5: $0 + $1.
4038       $6: 3-byte PC: subreg:HI ($5) + label_ref ($3)
4039           2-byte PC: subreg:HI ($5)
4040       $7: HI reg index into table (Z or pseudo)
4041       $8: Z or scratch:HI (to be clobbered)
4042       $9: R24 or const0_rtx (to be clobbered)
4043       $10: Extension to SImode of an 8-bit or 16-bit integer register $11.
4044       $11: QImode or HImode register input of $10.
4045
4046    Try to optimize this sequence, i.e. use the original HImode / QImode
4047    switch value instead of SImode.  */
4048
4049 static void
4050 avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
4051 {
4052   // Original mode of the switch value; this is QImode or HImode.
4053   machine_mode mode = GET_MODE (xop[11]);
4054
4055   // How the original switch value was extended to SImode; this is
4056   // SIGN_EXTEND or ZERO_EXTEND.
4057   rtx_code code = GET_CODE (xop[10]);
4058
4059   // Lower index, upper index (plus one) and range of case calues.
4060   HOST_WIDE_INT low_idx = -INTVAL (xop[1]);
4061   HOST_WIDE_INT num_idx = INTVAL (xop[2]);
4062   HOST_WIDE_INT hig_idx = low_idx + num_idx;
4063
4064   // Maximum ranges of (un)signed QImode resp. HImode.
4065   unsigned umax = QImode == mode ? 0xff : 0xffff;
4066   int imax = QImode == mode ? 0x7f : 0x7fff;
4067   int imin = -imax - 1;
4068
4069   // Testing the case range and whether it fits into the range of the
4070   // (un)signed mode.  This test should actually always pass because it
4071   // makes no sense to have case values outside the mode range.  Notice
4072   // that case labels which are unreachable because they are outside the
4073   // mode of the switch value (e.g. "case -1" for uint8_t) have already
4074   // been thrown away by the middle-end.
4075
4076   if (SIGN_EXTEND == code
4077       && low_idx >= imin
4078       && hig_idx <= imax)
4079     {
4080       // ok
4081     }
4082   else if (ZERO_EXTEND == code
4083            && low_idx >= 0
4084            && (unsigned) hig_idx <= umax)
4085     {
4086       // ok
4087     }
4088   else
4089     {
4090       if (dump_file)
4091         fprintf (dump_file, ";; Case ranges too big, giving up.\n\n");
4092       return;
4093     }
4094
4095   // Do normalization of switch value $10 and out-of-bound check in its
4096   // original mode instead of in SImode.  Use a newly created pseudo.
4097   // This will replace insns[1..2].
4098
4099   start_sequence ();
4100
4101   rtx reg = copy_to_mode_reg (mode, xop[11]);
4102
4103   rtx (*gen_add)(rtx,rtx,rtx) = QImode == mode ? gen_addqi3 : gen_addhi3;
4104   rtx (*gen_cbranch)(rtx,rtx,rtx,rtx)
4105     = QImode == mode ? gen_cbranchqi4 : gen_cbranchhi4;
4106
4107   emit_insn (gen_add (reg, reg, gen_int_mode (-low_idx, mode)));
4108   rtx op0 = reg; rtx op1 = gen_int_mode (num_idx, mode);
4109   rtx labelref = copy_rtx (xop[4]);
4110   rtx xbranch = gen_cbranch (gen_rtx_fmt_ee (GTU, VOIDmode, op0, op1),
4111                              op0, op1, labelref);
4112   rtx_insn *cbranch = emit_jump_insn (xbranch);
4113   JUMP_LABEL (cbranch) = xop[4];
4114   ++LABEL_NUSES (xop[4]);
4115
4116   rtx_insn *seq1 = get_insns ();
4117   rtx_insn *last1 = get_last_insn ();
4118   end_sequence ();
4119
4120   emit_insn_after (seq1, insns[2]);
4121
4122   // After the out-of-bounds test and corresponding branch, use a
4123   // 16-bit index.  If QImode is used, extend it to HImode first.
4124   // This will replace insns[4].
4125
4126   start_sequence ();
4127
4128   if (QImode == mode)
4129     reg = force_reg (HImode, gen_rtx_fmt_e (code, HImode, reg));
4130
4131   rtx pat_4 = AVR_3_BYTE_PC
4132     ? gen_movhi (xop[7], reg)
4133     : gen_addhi3 (xop[7], reg, gen_rtx_LABEL_REF (VOIDmode, xop[3]));
4134
4135   emit_insn (pat_4);
4136
4137   rtx_insn *seq2 = get_insns ();
4138   rtx_insn *last2 = get_last_insn ();
4139   end_sequence ();
4140
4141   emit_insn_after (seq2, insns[3]);
4142
4143   if (dump_file)
4144     {
4145       fprintf (dump_file, ";; New insns: ");
4146
4147       for (rtx_insn *insn = seq1; ; insn = NEXT_INSN (insn))
4148         {
4149           fprintf (dump_file, "%d, ", INSN_UID (insn));
4150           if (insn == last1)
4151             break;
4152         }
4153       for (rtx_insn *insn = seq2; ; insn = NEXT_INSN (insn))
4154         {
4155           fprintf (dump_file, "%d%s", INSN_UID (insn),
4156                    insn == last2 ? ".\n\n" : ", ");
4157           if (insn == last2)
4158             break;
4159         }
4160
4161       fprintf (dump_file, ";; Deleting insns: %d, %d, %d.\n\n",
4162                INSN_UID (insns[1]), INSN_UID (insns[2]), INSN_UID (insns[3]));
4163     }
4164
4165   // Pseudodelete the SImode and subreg of SImode insns.  We don't care
4166   // about the extension insns[0]: Its result is now unused and other
4167   // passes will clean it up.
4168
4169   SET_INSN_DELETED (insns[1]);
4170   SET_INSN_DELETED (insns[2]);
4171   SET_INSN_DELETED (insns[3]);
4172 }
4173
4174
4175 unsigned int
4176 avr_pass_casesi::execute (function *func)
4177 {
4178   basic_block bb;
4179
4180   FOR_EACH_BB_FN (bb, func)
4181     {
4182       rtx_insn *insn, *insns[5];
4183
4184       FOR_BB_INSNS (bb, insn)
4185         {
4186           if (avr_is_casesi_sequence (bb, insn, insns))
4187             {
4188               avr_optimize_casesi (insns, recog_data.operand);
4189             }
4190         }
4191     }
4192
4193   return 0;
4194 }
4195
4196 } // anonymous namespace
4197
4198 /* Perform some extra checks on operands of casesi_<mode>_sequence.
4199    Not all operand dependencies can be described by means of predicates.
4200    This function performs left over checks and should always return true.
4201    Returning false means that someone changed the casesi expander but did
4202    not adjust casesi_<mode>_sequence.  */
4203
4204 bool
4205 avr_casei_sequence_check_operands (rtx *xop)
4206 {
4207   rtx sub_5 = NULL_RTX;
4208
4209   if (AVR_HAVE_EIJMP_EICALL
4210       // The last clobber op of the tablejump.
4211       && xop[9] == all_regs_rtx[REG_24])
4212     {
4213       // $6 is: (subreg:SI ($5) 0)
4214       sub_5 = xop[6];
4215     }
4216
4217   if (!AVR_HAVE_EIJMP_EICALL
4218       // $6 is: (plus:HI (subreg:SI ($5) 0)
4219       //                 (label_ref ($3)))
4220       && PLUS == GET_CODE (xop[6])
4221       && LABEL_REF == GET_CODE (XEXP (xop[6], 1))
4222       && rtx_equal_p (xop[3], XEXP (XEXP (xop[6], 1), 0))
4223       // The last clobber op of the tablejump.
4224       && xop[9] == const0_rtx)
4225     {
4226       sub_5 = XEXP (xop[6], 0);
4227     }
4228
4229   if (sub_5
4230       && SUBREG_P (sub_5)
4231       && SUBREG_BYTE (sub_5) == 0
4232       && rtx_equal_p (xop[5], SUBREG_REG (sub_5)))
4233     return true;
4234
4235   if (dump_file)
4236     fprintf (dump_file, "\n;; Failed condition for casesi_<mode>_sequence\n\n");
4237
4238   return false;
4239 }
4240
4241 namespace
4242 {
4243
4244 \f
4245 //////////////////////////////////////////////////////////////////////////////
4246 // Find more POST_INC and PRE_DEC cases.
4247
4248 static const pass_data avr_pass_data_fuse_add =
4249 {
4250   RTL_PASS,         // type
4251   "",               // name (will be patched)
4252   OPTGROUP_NONE,    // optinfo_flags
4253   TV_MACH_DEP,      // tv_id
4254   0,                // properties_required
4255   0,                // properties_provided
4256   0,                // properties_destroyed
4257   0,                // todo_flags_start
4258   TODO_df_finish    // todo_flags_finish
4259 };
4260
4261 class avr_pass_fuse_add : public rtl_opt_pass
4262 {
4263 public:
4264   avr_pass_fuse_add (gcc::context *ctxt, const char *name)
4265     : rtl_opt_pass (avr_pass_data_fuse_add, ctxt)
4266   {
4267     this->name = name;
4268   }
4269
4270   // Cloning is required because we are running one instance of the pass
4271   // before peephole2. and a second one after cprop_hardreg.
4272   opt_pass * clone () final override
4273   {
4274     return make_avr_pass_fuse_add (m_ctxt);
4275   }
4276
4277   unsigned int execute (function *func) final override
4278   {
4279     func->machine->n_avr_fuse_add_executed += 1;
4280     n_avr_fuse_add_executed = func->machine->n_avr_fuse_add_executed;
4281
4282     if (optimize && avropt_fuse_add > 0)
4283       return execute1 (func);
4284     return 0;
4285   }
4286
4287   unsigned int execute1 (function *);
4288
4289   struct Some_Insn
4290   {
4291     rtx_insn *insn = nullptr;
4292     rtx dest, src;
4293     bool valid () const { return insn != nullptr; }
4294     void set_deleted ()
4295     {
4296       gcc_assert (insn);
4297       SET_INSN_DELETED (insn);
4298       insn = nullptr;
4299     }
4300   };
4301
4302   // If .insn is not NULL, then this is a  reg:HI += const_int
4303   // of an address register.
4304   struct Add_Insn : Some_Insn
4305   {
4306     rtx addend;
4307     int regno;
4308     Add_Insn () {}
4309     Add_Insn (rtx_insn *insn);
4310   };
4311
4312   // If .insn is not NULL, then this sets an address register
4313   // to a constant value.
4314   struct Ldi_Insn : Some_Insn
4315   {
4316     int regno;
4317     Ldi_Insn () {}
4318     Ldi_Insn (rtx_insn *insn);
4319   };
4320
4321   // If .insn is not NULL, then this is a load or store insn where the
4322   // address is REG or POST_INC with an address register.
4323   struct Mem_Insn : Some_Insn
4324   {
4325     rtx reg_or_0, mem, addr, addr_reg;
4326     int addr_regno;
4327     rtx_code addr_code;
4328     machine_mode mode;
4329     addr_space_t addr_space;
4330     bool store_p, volatile_p;
4331     Mem_Insn () {}
4332     Mem_Insn (rtx_insn *insn);
4333   };
4334
4335   rtx_insn *fuse_ldi_add (Ldi_Insn &prev_ldi, Add_Insn &add);
4336   rtx_insn *fuse_add_add (Add_Insn &prev_add, Add_Insn &add);
4337   rtx_insn *fuse_add_mem (Add_Insn &prev_add, Mem_Insn &mem);
4338   rtx_insn *fuse_mem_add (Mem_Insn &prev_mem, Add_Insn &add);
4339 }; // avr_pass_fuse_add
4340
4341
4342 /* Describe properties of AVR's indirect load and store instructions
4343    LD, LDD, ST, STD, LPM, ELPM depending on register number, volatility etc.
4344    Rules for "volatile" accesses are:
4345
4346          | Xmega           |  non-Xmega
4347    ------+-----------------+----------------
4348    load  | read LSB first  | read LSB first
4349    store | write LSB first | write MSB first
4350 */
4351
4352 struct AVR_LdSt_Props
4353 {
4354   bool has_postinc, has_predec, has_ldd;
4355   // The insn printers will use POST_INC or PRE_DEC addressing, no matter
4356   // what adressing modes we are feeding into them.
4357   bool want_postinc, want_predec;
4358
4359   AVR_LdSt_Props (int regno, bool store_p, bool volatile_p, addr_space_t as)
4360   {
4361     bool generic_p = ADDR_SPACE_GENERIC_P (as);
4362     bool flashx_p = (! generic_p
4363                      && as != ADDR_SPACE_MEMX && as != ADDR_SPACE_FLASHX);
4364     has_postinc = generic_p || (flashx_p && regno == REG_Z);
4365     has_predec = generic_p;
4366     has_ldd = ! AVR_TINY && generic_p && (regno == REG_Y || regno == REG_Z);
4367     want_predec  = volatile_p && generic_p && ! AVR_XMEGA && store_p;
4368     want_postinc = volatile_p && generic_p && (AVR_XMEGA || ! store_p);
4369     want_postinc |= flashx_p && regno == REG_Z;
4370   }
4371
4372   AVR_LdSt_Props (const avr_pass_fuse_add::Mem_Insn &m)
4373     : AVR_LdSt_Props (m.addr_regno, m.store_p, m.volatile_p, m.addr_space)
4374   {
4375     gcc_assert (m.valid ());
4376   }
4377 };
4378
4379
4380 /* Emit a single_set that clobbers REG_CC.  */
4381
4382 static rtx_insn *
4383 emit_move_ccc (rtx dest, rtx src)
4384 {
4385   return emit_insn (gen_gen_move_clobbercc (dest, src));
4386 }
4387
4388
4389 /* Emit a single_set that clobbers REG_CC after insn AFTER.  */
4390
4391 static rtx_insn *
4392 emit_move_ccc_after (rtx dest, rtx src, rtx_insn *after)
4393 {
4394   return emit_insn_after (gen_gen_move_clobbercc (dest, src), after);
4395 }
4396
4397 static bool
4398 reg_seen_between_p (const_rtx reg, const rtx_insn *from, const rtx_insn *to)
4399 {
4400   return (reg_used_between_p (reg, from, to)
4401           || reg_set_between_p (reg, from, to));
4402 }
4403
4404
4405 static void
4406 avr_maybe_adjust_cfa (rtx_insn *insn, rtx reg, int addend)
4407 {
4408   if (addend
4409       && frame_pointer_needed
4410       && REGNO (reg) == FRAME_POINTER_REGNUM
4411       && avropt_fuse_add == 3)
4412     {
4413       rtx plus = plus_constant (Pmode, reg, addend);
4414       RTX_FRAME_RELATED_P (insn) = 1;
4415       add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (reg, plus));
4416     }
4417 }
4418
4419
4420 // If successful, this represents a SET of a pointer register to a constant.
4421 avr_pass_fuse_add::Ldi_Insn::Ldi_Insn (rtx_insn *insn)
4422 {
4423   rtx set = single_set (insn);
4424   if (!set)
4425     return;
4426
4427   src = SET_SRC (set);
4428   dest = SET_DEST (set);
4429
4430   if (REG_P (dest)
4431       && GET_MODE (dest) == Pmode
4432       && IN_RANGE (regno = REGNO (dest), REG_X, REG_Z)
4433       && CONSTANT_P (src))
4434     {
4435       this->insn = insn;
4436     }
4437 }
4438
4439 // If successful, this represents a PLUS with CONST_INT of a pointer
4440 // register X, Y or Z.  Otherwise, the object is not valid().
4441 avr_pass_fuse_add::Add_Insn::Add_Insn (rtx_insn *insn)
4442 {
4443   rtx set = single_set (insn);
4444   if (!set)
4445     return;
4446
4447   src = SET_SRC (set);
4448   dest = SET_DEST (set);
4449   if (REG_P (dest)
4450       // We are only interested in PLUSes that change address regs.
4451       && GET_MODE (dest) == Pmode
4452       && IN_RANGE (regno = REGNO (dest), REG_X, REG_Z)
4453       && PLUS == GET_CODE (src)
4454       && rtx_equal_p (XEXP (src, 0), dest)
4455       && CONST_INT_P (XEXP (src, 1)))
4456     {
4457       // This is reg:HI += const_int.
4458       addend = XEXP (src, 1);
4459       this->insn = insn;
4460     }
4461 }
4462
4463 // If successful, this represents a load or store insn where the addressing
4464 // mode uses pointer register X, Y or Z.  Otherwise, the object is not valid().
4465 avr_pass_fuse_add::avr_pass_fuse_add::Mem_Insn::Mem_Insn (rtx_insn *insn)
4466 {
4467   rtx set = single_set (insn);
4468   if (!set)
4469     return;
4470
4471   src = SET_SRC (set);
4472   dest = SET_DEST (set);
4473   mode = GET_MODE (dest);
4474
4475   if (MEM_P (dest)
4476       && (REG_P (src) || src == CONST0_RTX (mode)))
4477     {
4478       reg_or_0 = src;
4479       mem = dest;
4480     }
4481   else if (REG_P (dest) && MEM_P (src))
4482     {
4483       reg_or_0 = dest;
4484       mem = src;
4485     }
4486   else
4487     return;
4488
4489   if (avr_mem_memx_p (mem)
4490       || avr_load_libgcc_p (mem))
4491     return;
4492
4493   addr = XEXP (mem, 0);
4494   addr_code = GET_CODE (addr);
4495
4496   if (addr_code == REG)
4497     addr_reg = addr;
4498   else if (addr_code == POST_INC || addr_code == PRE_DEC)
4499     addr_reg = XEXP (addr, 0);
4500   else
4501     return;
4502
4503   addr_regno = REGNO (addr_reg);
4504
4505   if (avropt_fuse_add == 2
4506       && frame_pointer_needed
4507       && addr_regno == FRAME_POINTER_REGNUM)
4508     MEM_VOLATILE_P (mem) = 0;
4509
4510   if (reg_overlap_mentioned_p (reg_or_0, addr) // Can handle CONSTANT_P.
4511       || addr_regno > REG_Z
4512       || avr_mem_memx_p (mem)
4513       // The following optimizations only handle REG and POST_INC,
4514       // so that's all what we allow here.
4515       || (addr_code != REG && addr_code != POST_INC))
4516     return;
4517
4518   addr_space = MEM_ADDR_SPACE (mem);
4519   volatile_p = MEM_VOLATILE_P (mem);
4520   store_p = MEM_P (dest);
4521
4522   // Turn this "valid".
4523   this->insn = insn;
4524 }
4525
4526 /* Try to combine a Ldi insn with a PLUS CONST_INT addend to one Ldi insn.
4527    If LDI is valid, then it precedes ADD in the same block.
4528    When a replacement is found, a new insn is emitted and the old insns
4529    are pseudo-deleted.  The returned insn is the point where the calling
4530    scanner should continue.  When no replacement is found, nullptr is
4531    returned and nothing changed.  */
4532
4533 rtx_insn *
4534 avr_pass_fuse_add::fuse_ldi_add (Ldi_Insn &ldi, Add_Insn &add)
4535 {
4536   if (! ldi.valid ()
4537       || reg_seen_between_p (ldi.dest, ldi.insn, add.insn))
4538     {
4539       // If something is between the Ldi and the current insn, we can
4540       // set the Ldi invalid to speed future scans.
4541       return ldi.insn = nullptr;
4542     }
4543
4544   // Found a Ldi with const and a PLUS insns in the same BB,
4545   // and with no interfering insns between them.
4546
4547   // Emit new Ldi with the sum of the original offsets after the old Ldi.
4548   rtx xval = plus_constant (Pmode, ldi.src, INTVAL (add.addend));
4549
4550   rtx_insn *insn = emit_move_ccc_after (ldi.dest, xval, ldi.insn);
4551   avr_dump (";; new Ldi[%d] insn %d after %d: R%d = %r\n\n", ldi.regno,
4552             INSN_UID (insn), INSN_UID (ldi.insn), ldi.regno, xval);
4553
4554   rtx_insn *next = NEXT_INSN (add.insn);
4555   ldi.set_deleted ();
4556   add.set_deleted ();
4557
4558   return next;
4559 }
4560
4561 /* Try to combine two PLUS insns with CONST_INT addend to one such insn.
4562    If PREV_ADD is valid, then it precedes ADD in the same basic block.
4563    When a replacement is found, a new insn is emitted and the old insns
4564    are pseudo-deleted.  The returned insn is the point where the calling
4565    scanner should continue.  When no replacement is found, nullptr is
4566    returned and nothing changed.  */
4567
4568 rtx_insn *
4569 avr_pass_fuse_add::fuse_add_add (Add_Insn &prev_add, Add_Insn &add)
4570 {
4571   if (! prev_add.valid ()
4572       || reg_seen_between_p (add.dest, prev_add.insn, add.insn))
4573     {
4574       // If something is between the previous Add and the current insn,
4575       // we can set the previous Add invalid to speed future scans.
4576       return prev_add.insn = nullptr;
4577     }
4578
4579   // Found two PLUS insns in the same BB, and with no interfering
4580   // insns between them.
4581   rtx plus = plus_constant (Pmode, add.src, INTVAL (prev_add.addend));
4582
4583   rtx_insn *next;
4584   if (REG_P (plus))
4585     {
4586       avr_dump (";; Add[%d] from %d annihilates %d\n\n", add.regno,
4587                 INSN_UID (prev_add.insn), INSN_UID (add.insn));
4588       next = NEXT_INSN (add.insn);
4589     }
4590   else
4591     {
4592       // Emit after the current insn, so that it will be picked
4593       // up as next valid Add insn.
4594       next = emit_move_ccc_after (add.dest, plus, add.insn);
4595       avr_dump (";; #1 new Add[%d] insn %d after %d: R%d += %d\n\n",
4596                 add.regno, INSN_UID (next), INSN_UID (add.insn),
4597                 add.regno, (int) INTVAL (XEXP (plus, 1)));
4598       gcc_assert (GET_CODE (plus) == PLUS);
4599     }
4600
4601   add.set_deleted ();
4602   prev_add.set_deleted ();
4603
4604   return next;
4605 }
4606
4607 /* Try to combine a PLUS of the address register with a load or store insn.
4608    If ADD is valid, then it precedes MEM in the same basic block.
4609    When a replacement is found, a new insn is emitted and the old insns
4610    are pseudo-deleted.  The returned insn is the point where the calling
4611    scanner should continue.  When no replacement is found, nullptr is
4612    returned and nothing changed.  */
4613
4614 rtx_insn *
4615 avr_pass_fuse_add::fuse_add_mem (Add_Insn &add, Mem_Insn &mem)
4616 {
4617   if (! add.valid ()
4618       || reg_seen_between_p (add.dest, add.insn, mem.insn))
4619     {
4620       // If something is between the Add and the current insn, we can
4621       // set the Add invalid to speed future scans.
4622       return add.insn = nullptr;
4623     }
4624
4625   AVR_LdSt_Props ap { mem };
4626
4627   int msize = GET_MODE_SIZE (mem.mode);
4628
4629   // The mem insn really wants PRE_DEC.
4630   bool case1 = ((mem.addr_code == REG || mem.addr_code == POST_INC)
4631                 && msize > 1 && ap.want_predec && ! ap.has_ldd);
4632
4633   // The offset can be consumed by a PRE_DEC.
4634   bool case2 = (- INTVAL (add.addend) == msize
4635                 && (mem.addr_code == REG || mem.addr_code == POST_INC)
4636                 && ap.has_predec && ! ap.want_postinc);
4637
4638   if (! case1 && ! case2)
4639     return nullptr;
4640
4641   // Change from REG or POST_INC to PRE_DEC.
4642   rtx xmem = change_address (mem.mem, mem.mode,
4643                              gen_rtx_PRE_DEC (Pmode, mem.addr_reg));
4644   rtx dest = mem.store_p ? xmem : mem.reg_or_0;
4645   rtx src  = mem.store_p ? mem.reg_or_0 : xmem;
4646
4647   rtx_insn *next = emit_move_ccc_after (dest, src, mem.insn);
4648   add_reg_note (next, REG_INC, mem.addr_reg);
4649   avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", mem.addr_regno,
4650             INSN_UID (next), INSN_UID (mem.insn), dest, src);
4651
4652   // Changing REG or POST_INC -> PRE_DEC means that the addend before
4653   // the memory access must be increased by the size of the access,
4654   rtx plus = plus_constant (Pmode, add.src, msize);
4655   if (! REG_P (plus))
4656     {
4657       rtx_insn *insn = emit_move_ccc_after (add.dest, plus, add.insn);
4658       avr_dump (";; #2 new Add[%d] insn %d after %d: R%d += %d\n\n",
4659                 add.regno, INSN_UID (insn), INSN_UID (add.insn),
4660                 add.regno, (int) INTVAL (XEXP (plus, 1)));
4661       gcc_assert (GET_CODE (plus) == PLUS);
4662     }
4663   else
4664     avr_dump (";; Add[%d] insn %d consumed into %d\n\n",
4665               add.regno, INSN_UID (add.insn), INSN_UID (next));
4666
4667   // Changing POST_INC -> PRE_DEC means that the addend after the mem has to be
4668   // the size of the access.  The hope is that this new add insn may be unused.
4669   if (mem.addr_code == POST_INC)
4670     {
4671       plus = plus_constant (Pmode, add.dest, msize);
4672       rtx_insn *next2 = emit_move_ccc_after (add.dest, plus, next);
4673       avr_dump (";; #3 new Add[%d] insn %d after %d: R%d += %d\n\n", add.regno,
4674                 INSN_UID (next2), INSN_UID (next), add.regno, msize);
4675       next = next2;
4676     }
4677
4678   add.set_deleted ();
4679   mem.set_deleted ();
4680
4681   return next;
4682 }
4683
4684 /* Try to combine a load or store insn with a PLUS of the address register.
4685    If MEM is valid, then it precedes ADD in the same basic block.
4686    When a replacement is found, a new insn is emitted and the old insns
4687    are pseudo-deleted.  The returned insn is the point where the calling
4688    scanner should continue.  When no replacement is found, nullptr is
4689    returned and nothing changed.  */
4690
4691 rtx_insn *
4692 avr_pass_fuse_add::fuse_mem_add (Mem_Insn &mem, Add_Insn &add)
4693 {
4694   if (! mem.valid ()
4695       || reg_seen_between_p (add.dest, mem.insn, add.insn))
4696     {
4697       // If something is between the Mem and the current insn, we can
4698       // set the Mem invalid to speed future scans.
4699       return mem.insn = nullptr;
4700     }
4701
4702   AVR_LdSt_Props ap { mem };
4703
4704   int msize = GET_MODE_SIZE (mem.mode);
4705
4706   // The add insn can be consumed by a POST_INC.
4707   bool case1 = (mem.addr_code == REG
4708                 && INTVAL (add.addend) == msize
4709                 && ap.has_postinc && ! ap.want_predec);
4710
4711   // There are cases where even a partial consumption of the offset is better.
4712   // This are the cases where no LD+offset addressing is available, because
4713   // the address register is obviously used after the mem insn, and a mem insn
4714   // with REG addressing mode will have to restore the address.
4715   bool case2 = (mem.addr_code == REG
4716                 && msize > 1 && ap.want_postinc && ! ap.has_ldd);
4717
4718   if (! case1 && ! case2)
4719     return nullptr;
4720
4721   // Change addressing mode from REG to POST_INC.
4722   rtx xmem = change_address (mem.mem, mem.mode,
4723                              gen_rtx_POST_INC (Pmode, mem.addr_reg));
4724   rtx dest = mem.store_p ? xmem : mem.reg_or_0;
4725   rtx src  = mem.store_p ? mem.reg_or_0 : xmem;
4726
4727   rtx_insn *insn = emit_move_ccc_after (dest, src, mem.insn);
4728   add_reg_note (insn, REG_INC, mem.addr_reg);
4729   avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", add.regno,
4730             INSN_UID (insn), INSN_UID (mem.insn), dest, src);
4731
4732   rtx_insn *next = NEXT_INSN (add.insn);
4733
4734   // Changing REG -> POST_INC means that the post addend must be
4735   // decreased by the size of the access.
4736   rtx plus = plus_constant (Pmode, add.src, -msize);
4737   if (! REG_P (plus))
4738     {
4739       next = emit_move_ccc_after (mem.addr_reg, plus, add.insn);
4740       avr_dump (";; #4 new Add[%d] insn %d after %d: R%d += %d\n\n",
4741                 add.regno, INSN_UID (next), INSN_UID (add.insn),
4742                 add.regno, (int) INTVAL (XEXP (plus, 1)));
4743       gcc_assert (GET_CODE (plus) == PLUS);
4744     }
4745   else
4746     avr_dump (";; Add[%d] insn %d consumed into %d\n\n",
4747               add.regno, INSN_UID (add.insn), INSN_UID (insn));
4748
4749   add.set_deleted ();
4750   mem.set_deleted ();
4751
4752   return next;
4753 }
4754
4755 /* Try to post-reload combine PLUS with CONST_INt of pointer registers with:
4756    - Sets to a constant address.
4757    - PLUS insn of that kind.
4758    - Indirect loads and stores.
4759    In almost all cases, combine opportunities arise from the preparation
4760    done by `avr_split_fake_addressing_move', but in some rare cases combinations
4761    are found for the ordinary cores, too.
4762       As we consider at most one Mem insn per try, there may still be missed
4763    optimizations like  POST_INC + PLUS + POST_INC  might be performed
4764    as  PRE_DEC + PRE_DEC  for two adjacent locations.  */
4765
4766 unsigned int
4767 avr_pass_fuse_add::execute1 (function *func)
4768 {
4769   df_note_add_problem ();
4770   df_analyze ();
4771
4772   int n_add = 0, n_mem = 0, n_ldi = 0;
4773   basic_block bb;
4774
4775   FOR_EACH_BB_FN (bb, func)
4776     {
4777       Ldi_Insn prev_ldi_insns[REG_32];
4778       Add_Insn prev_add_insns[REG_32];
4779       Mem_Insn prev_mem_insns[REG_32];
4780       rtx_insn *insn, *curr;
4781
4782       avr_dump ("\n;; basic block %d\n\n", bb->index);
4783
4784       FOR_BB_INSNS_SAFE (bb, insn, curr)
4785         {
4786           rtx_insn *next = nullptr;
4787           Ldi_Insn ldi_insn { insn };
4788           Add_Insn add_insn { insn };
4789           Mem_Insn mem_insn { insn };
4790
4791           if (add_insn.valid ())
4792             {
4793               // Found reg:HI += const_int
4794               avr_dump (";; insn %d: Add[%d]: R%d += %d\n\n",
4795                         INSN_UID (add_insn.insn), add_insn.regno,
4796                         add_insn.regno, (int) INTVAL (add_insn.addend));
4797               Ldi_Insn &prev_ldi_insn = prev_ldi_insns[add_insn.regno];
4798               Add_Insn &prev_add_insn = prev_add_insns[add_insn.regno];
4799               Mem_Insn &prev_mem_insn = prev_mem_insns[add_insn.regno];
4800               if ((next = fuse_ldi_add (prev_ldi_insn, add_insn)))
4801                 curr = next, n_ldi += 1;
4802               else if ((next = fuse_add_add (prev_add_insn, add_insn)))
4803                 curr = next, n_add += 1;
4804               else if ((next = fuse_mem_add (prev_mem_insn, add_insn)))
4805                 curr = next, n_mem += 1;
4806               else
4807                 prev_add_insn = add_insn;
4808             }
4809           else if (mem_insn.valid ())
4810             {
4811               int addr_regno = REGNO (mem_insn.addr_reg);
4812               avr_dump (";; insn %d: Mem[%d]: %r = %r\n\n",
4813                         INSN_UID (mem_insn.insn), addr_regno,
4814                         mem_insn.dest, mem_insn.src);
4815               Add_Insn &prev_add_insn = prev_add_insns[addr_regno];
4816               if ((next = fuse_add_mem (prev_add_insn, mem_insn)))
4817                 curr = next, n_mem += 1;
4818               else
4819                 prev_mem_insns[addr_regno] = mem_insn;
4820             }
4821           else if (ldi_insn.valid ())
4822             {
4823               if (! CONST_INT_P (ldi_insn.src))
4824                 avr_dump (";; insn %d: Ldi[%d]: R%d = %r\n\n",
4825                           INSN_UID (ldi_insn.insn), ldi_insn.regno,
4826                           ldi_insn.regno, ldi_insn.src);
4827               prev_ldi_insns[ldi_insn.regno] = ldi_insn;
4828             }
4829         } // for insns
4830     } // for BBs
4831
4832   avr_dump (";; Function %f: Found %d changes: %d ldi, %d add, %d mem.\n",
4833             n_ldi + n_add + n_mem, n_ldi, n_add, n_mem);
4834
4835   return 0;
4836 }
4837
4838
4839 \f
4840 //////////////////////////////////////////////////////////////////////////////
4841 // Split shift insns after peephole2 / befor avr-fuse-move.
4842
4843 static const pass_data avr_pass_data_split_after_peephole2 =
4844 {
4845   RTL_PASS,         // type
4846   "",               // name (will be patched)
4847   OPTGROUP_NONE,    // optinfo_flags
4848   TV_DF_SCAN,       // tv_id
4849   0,                // properties_required
4850   0,                // properties_provided
4851   0,                // properties_destroyed
4852   0,                // todo_flags_start
4853   0                 // todo_flags_finish
4854 };
4855
4856 class avr_pass_split_after_peephole2 : public rtl_opt_pass
4857 {
4858 public:
4859   avr_pass_split_after_peephole2 (gcc::context *ctxt, const char *name)
4860     : rtl_opt_pass (avr_pass_data_split_after_peephole2, ctxt)
4861   {
4862     this->name = name;
4863   }
4864
4865   unsigned int execute (function *) final override
4866   {
4867     if (avr_shift_is_3op ())
4868       split_all_insns ();
4869     return 0;
4870   }
4871
4872 }; // avr_pass_split_after_peephole2
4873
4874 } // anonymous namespace
4875
4876
4877 /* Whether some shift insn alternatives are a `3op' 3-operand insn.
4878    This 3op alternatives allow the source and the destination register
4879    of the shift to be different right from the start, because the splitter
4880    will split the 3op shift into a 3-operand byte shift and a 2-operand
4881    residual bit shift.  (When the residual shift has an offset of one
4882    less than the bitsize, then the residual shift is also a 3op insn.)  */
4883
4884 bool
4885 avr_shift_is_3op ()
4886 {
4887   // Don't split for OPTIMIZE_SIZE_MAX (-Oz).
4888   // For OPTIMIZE_SIZE_BALANCED (-Os), we still split because
4889   // the size overhead (if at all) is marginal.
4890
4891   return (avropt_split_bit_shift
4892           && optimize > 0
4893           && avr_optimize_size_level () < OPTIMIZE_SIZE_MAX);
4894 }
4895
4896
4897 /* Implement constraints `C2a', `C2l', `C2r' ... `C4a', `C4l', `C4r'.
4898    Whether we split an N_BYTES shift of code CODE in { ASHIFTRT,
4899    LSHIFTRT, ASHIFT } into a byte shift and a residual bit shift.  */
4900
4901 bool
4902 avr_split_shift_p (int n_bytes, int offset, rtx_code code)
4903 {
4904   gcc_assert (n_bytes == 4 || n_bytes == 3 || n_bytes == 2);
4905
4906   if (! avr_shift_is_3op ()
4907       || offset % 8 == 0)
4908     return false;
4909
4910   if (n_bytes == 4)
4911     return select<bool>()
4912       : code == ASHIFT ? IN_RANGE (offset, 9, 31) && offset != 15
4913       : code == ASHIFTRT ? IN_RANGE (offset, 9, 29) && offset != 15
4914       : code == LSHIFTRT ? IN_RANGE (offset, 9, 31) && offset != 15
4915       : bad_case<bool> ();
4916
4917   if (n_bytes == 3)
4918     return select<bool>()
4919       : code == ASHIFT ? IN_RANGE (offset, 9, 23) && offset != 15
4920       : code == ASHIFTRT ? IN_RANGE (offset, 9, 21) && offset != 15
4921       : code == LSHIFTRT ? IN_RANGE (offset, 9, 23) && offset != 15
4922       : bad_case<bool> ();
4923
4924   if (n_bytes == 2)
4925     return select<bool>()
4926       : code == ASHIFT ? IN_RANGE (offset, 9, 15)
4927       : code == ASHIFTRT ? IN_RANGE (offset, 9, 13)
4928       : code == LSHIFTRT ? IN_RANGE (offset, 9, 15)
4929       : bad_case<bool> ();
4930
4931   return false;
4932 }
4933
4934
4935 /* Emit a DEST = SRC <code> OFF shift of QImode, HImode or PSImode.
4936    SCRATCH is a QImode d-register, scratch:QI, or NULL_RTX.
4937    This function is used to emit shifts that have been split into
4938    a byte shift and a residual bit shift that operates on a mode
4939    strictly smaller than the original shift.  */
4940
4941 static void
4942 avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch)
4943 {
4944   const machine_mode mode = GET_MODE (dest);
4945   const int n_bits = GET_MODE_BITSIZE (mode);
4946   rtx xoff = GEN_INT (off);
4947
4948   // Work out which alternatives can handle 3 operands independent
4949   // of options.
4950
4951   const bool b8_is_3op = off == 6;
4952
4953   const bool b16_is_3op = select<bool>()
4954     : code == ASHIFT ? (satisfies_constraint_C7c (xoff) // 7...12
4955                         // The "C05 C06" alternative of *ashlhi3_const.
4956                         || (AVR_HAVE_MUL && scratch && (off == 5 || off == 6)))
4957     : code == LSHIFTRT ? satisfies_constraint_C7c (xoff)
4958     : code == ASHIFTRT ? off == 7
4959     : bad_case<bool> ();
4960
4961   const bool b24_is_3op = select<bool>()
4962     : code == ASHIFT ? off == 15
4963     : code == LSHIFTRT ? off == 15
4964     : code == ASHIFTRT ? false
4965     : bad_case<bool> ();
4966
4967   const bool is_3op = (off % 8 == 0
4968                        || off == n_bits - 1
4969                        || (code == ASHIFTRT && off == n_bits - 2)
4970                        || (n_bits == 8 && b8_is_3op)
4971                        || (n_bits == 16 && b16_is_3op)
4972                        || (n_bits == 24 && b24_is_3op));
4973   rtx shift;
4974
4975   if (is_3op)
4976     {
4977       shift = gen_rtx_fmt_ee (code, mode, src, xoff);
4978     }
4979   else
4980     {
4981       if (REGNO (dest) != REGNO (src))
4982         emit_valid_move_clobbercc (dest, src);
4983       shift = gen_rtx_fmt_ee (code, mode, dest, xoff);
4984     }
4985
4986   if (n_bits == 8)
4987     // 8-bit shifts don't have a scratch operand.
4988     scratch = NULL_RTX;
4989   else if (! scratch && n_bits == 24)
4990     // 24-bit shifts always have a scratch operand.
4991     scratch = gen_rtx_SCRATCH (QImode);
4992
4993   emit_valid_move_clobbercc (dest, shift, scratch);
4994 }
4995
4996
4997 /* Handle the 4-byte case of avr_split_shift below:
4998    Split 4-byte shift  DEST = SRC <code> IOFF  into a 3-operand
4999    byte shift and a residual shift in a smaller mode if possible.
5000    SCRATCH is a QImode upper scratch register or NULL_RTX.  */
5001
5002 static bool
5003 avr_split_shift4 (rtx dest, rtx src, int ioff, rtx scratch, rtx_code code)
5004 {
5005   gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 4);
5006
5007   if (code == ASHIFT)
5008     {
5009       if (IN_RANGE (ioff, 25, 31))
5010         {
5011           rtx dst8 = avr_byte (dest, 3);
5012           rtx src8 = avr_byte (src, 0);
5013           avr_emit_shift (code, dst8, src8, ioff - 24, NULL_RTX);
5014           emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx);
5015           emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
5016           return true;
5017         }
5018       else if (IN_RANGE (ioff, 17, 23))
5019         {
5020           rtx dst16 = avr_word (dest, 2);
5021           rtx src16 = avr_word (src, 0);
5022           avr_emit_shift (code, dst16, src16, ioff - 16, scratch);
5023           emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
5024           return true;
5025         }
5026       // ...the 9...14 cases are only handled by define_split because
5027       // for now, we don't exploit that the low byte is zero.
5028     }
5029   else if (code == ASHIFTRT
5030            || code == LSHIFTRT)
5031     {
5032       if (IN_RANGE (ioff, 25, 30 + (code == LSHIFTRT)))
5033         {
5034           rtx dst8 = avr_byte (dest, 0);
5035           rtx src8 = avr_byte (src, 3);
5036           avr_emit_shift (code, dst8, src8, ioff - 24, NULL_RTX);
5037           if (code == ASHIFTRT)
5038             {
5039               rtx signs = avr_byte (dest, 1);
5040               avr_emit_shift (code, signs, src8, 7, NULL_RTX);
5041               emit_valid_move_clobbercc (avr_byte (dest, 2), signs);
5042               emit_valid_move_clobbercc (avr_byte (dest, 3), signs);
5043             }
5044           else
5045             {
5046               emit_valid_move_clobbercc (avr_byte (dest, 1), const0_rtx);
5047               emit_valid_move_clobbercc (avr_word (dest, 2), const0_rtx);
5048             }
5049           return true;
5050         }
5051       else if (IN_RANGE (ioff, 17, 23))
5052         {
5053           rtx dst16 = avr_word (dest, 0);
5054           rtx src16 = avr_word (src, 2);
5055           avr_emit_shift (code, dst16, src16, ioff - 16, scratch);
5056           if (code == ASHIFTRT)
5057             {
5058               rtx msb = avr_byte (src, 3);
5059               rtx signs = avr_byte (dest, 2);
5060               avr_emit_shift (code, signs, msb, 7, NULL_RTX);
5061               emit_valid_move_clobbercc (avr_byte (dest, 3), signs);
5062             }
5063           else
5064             emit_valid_move_clobbercc (avr_word (dest, 2), const0_rtx);
5065
5066           return true;
5067         }
5068       else if (IN_RANGE (ioff, 9, 15))
5069         {
5070           avr_emit_shift (code, dest, src, 8, scratch);
5071           rtx dst24 = avr_chunk (PSImode, dest, 0);
5072           rtx src24 = avr_chunk (PSImode, dest, 0);
5073           avr_emit_shift (code, dst24, src24, ioff - 8, scratch);
5074           return true;
5075         }
5076     }
5077   else
5078     gcc_unreachable ();
5079
5080   return false;
5081 }
5082
5083
5084 /* Handle the 3-byte case of avr_split_shift below:
5085    Split 3-byte shift  DEST = SRC <code> IOFF  into a 3-operand
5086    byte shift and a residual shift in a smaller mode if possible.
5087    SCRATCH is a QImode upper scratch register or NULL_RTX.  */
5088
5089 static bool
5090 avr_split_shift3 (rtx dest, rtx src, int ioff, rtx scratch, rtx_code code)
5091 {
5092   gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 3);
5093
5094   if (code == ASHIFT)
5095     {
5096       if (IN_RANGE (ioff, 17, 23))
5097         {
5098           rtx dst8 = avr_byte (dest, 2);
5099           rtx src8 = avr_byte (src, 0);
5100           avr_emit_shift (code, dst8, src8, ioff - 16, NULL_RTX);
5101           emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
5102           return true;
5103         }
5104       // ...the 9...14 cases are only handled by define_split because
5105       // for now, we don't exploit that the low byte is zero.
5106     }
5107   else if (code == ASHIFTRT
5108            || code == LSHIFTRT)
5109     {
5110       if (IN_RANGE (ioff, 17, 22 + (code == LSHIFTRT)))
5111         {
5112           rtx dst8 = avr_byte (dest, 0);
5113           rtx src8 = avr_byte (src, 2);
5114           avr_emit_shift (code, dst8, src8, ioff - 16, NULL_RTX);
5115           if (code == ASHIFTRT)
5116             {
5117               rtx signs = avr_byte (dest, 1);
5118               avr_emit_shift (code, signs, src8, 7, NULL_RTX);
5119               emit_valid_move_clobbercc (avr_byte (dest, 2), signs);
5120             }
5121           else
5122             {
5123               emit_valid_move_clobbercc (avr_byte (dest, 1), const0_rtx);
5124               emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx);
5125             }
5126           return true;
5127         }
5128       else if (IN_RANGE (ioff, 9, 15))
5129         {
5130           avr_emit_shift (code, dest, src, 8, scratch);
5131           rtx dst16 = avr_chunk (HImode, dest, 0);
5132           rtx src16 = avr_chunk (HImode, dest, 0);
5133           avr_emit_shift (code, dst16, src16, ioff - 8, scratch);
5134           return true;
5135         }
5136     }
5137   else
5138     gcc_unreachable ();
5139
5140   return false;
5141 }
5142
5143
5144 /* Handle the 2-byte case of avr_split_shift below:
5145    Split 2-byte shift  DEST = SRC <code> IOFF  into a 3-operand
5146    byte shift and a residual shift in a smaller mode if possible.
5147    SCRATCH is a QImode upper scratch register or NULL_RTX.  */
5148
5149 static bool
5150 avr_split_shift2 (rtx dest, rtx src, int ioff, rtx /*scratch*/, rtx_code code)
5151 {
5152   gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 2);
5153
5154   if (code == ASHIFT)
5155     {
5156       if (IN_RANGE (ioff, 9, 15))
5157         {
5158           rtx dst8 = avr_byte (dest, 1);
5159           rtx src8 = avr_byte (src, 0);
5160           avr_emit_shift (code, dst8, src8, ioff - 8, NULL_RTX);
5161           emit_valid_move_clobbercc (avr_byte (dest, 0), const0_rtx);
5162           return true;
5163         }
5164     }
5165   else if (code == ASHIFTRT
5166            || code == LSHIFTRT)
5167     {
5168       if (IN_RANGE (ioff, 9, 14 + (code == LSHIFTRT)))
5169         {
5170           rtx dst8 = avr_byte (dest, 0);
5171           rtx src8 = avr_byte (src, 1);
5172           rtx signs = const0_rtx;
5173           avr_emit_shift (code, dst8, src8, ioff - 8, NULL_RTX);
5174           if (code == ASHIFTRT)
5175             {
5176               signs = avr_byte (dest, 1);
5177               avr_emit_shift (code, signs, src8, 7, NULL_RTX);
5178             }
5179           emit_valid_move_clobbercc (avr_byte (dest, 1), signs);
5180           return true;
5181         }
5182     }
5183   else
5184     gcc_unreachable ();
5185
5186   return false;
5187 }
5188
5189
5190 /* Worker for a define_split that runs when -msplit-bit-shift is on.
5191    Split a shift of code CODE into a 3op byte shift and a residual bit shift.
5192    Return 'true' when a split has been performed and insns have been emitted.
5193    Otherwise, return 'false'.  */
5194
5195 bool
5196 avr_split_shift (rtx xop[], rtx scratch, rtx_code code)
5197 {
5198   scratch = scratch && REG_P (scratch) ? scratch : NULL_RTX;
5199   rtx dest = xop[0];
5200   rtx src = xop[1];
5201   int ioff = INTVAL (xop[2]);
5202   int n_bytes = GET_MODE_SIZE (GET_MODE (dest));
5203
5204   return select<bool>()
5205     : n_bytes == 2 ? avr_split_shift2 (dest, src, ioff, scratch, code)
5206     : n_bytes == 3 ? avr_split_shift3 (dest, src, ioff, scratch, code)
5207     : n_bytes == 4 ? avr_split_shift4 (dest, src, ioff, scratch, code)
5208     : bad_case<bool> ();
5209 }
5210
5211
5212 namespace
5213 {
5214
5215 \f
5216 //////////////////////////////////////////////////////////////////////////////
5217 // Determine whether an ISR may use the __gcc_isr pseudo-instruction.
5218
5219 static const pass_data avr_pass_data_pre_proep =
5220 {
5221   RTL_PASS,         // type
5222   "",               // name (will be patched)
5223   OPTGROUP_NONE,    // optinfo_flags
5224   TV_DF_SCAN,       // tv_id
5225   0,                // properties_required
5226   0,                // properties_provided
5227   0,                // properties_destroyed
5228   0,                // todo_flags_start
5229   0                 // todo_flags_finish
5230 };
5231
5232 class avr_pass_pre_proep : public rtl_opt_pass
5233 {
5234 public:
5235   avr_pass_pre_proep (gcc::context *ctxt, const char *name)
5236     : rtl_opt_pass (avr_pass_data_pre_proep, ctxt)
5237   {
5238     this->name = name;
5239   }
5240
5241   void compute_maybe_gasisr (function *);
5242
5243   unsigned int execute (function *fun) final override
5244   {
5245     if (avropt_gasisr_prologues
5246         // Whether this function is an ISR worth scanning at all.
5247         && !fun->machine->is_no_gccisr
5248         && (fun->machine->is_interrupt
5249             || fun->machine->is_signal)
5250         && !cfun->machine->is_naked
5251         // Paranoia: Non-local gotos and labels that might escape.
5252         && !cfun->calls_setjmp
5253         && !cfun->has_nonlocal_label
5254         && !cfun->has_forced_label_in_static)
5255       {
5256         compute_maybe_gasisr (fun);
5257       }
5258
5259     return 0;
5260   }
5261
5262 }; // avr_pass_pre_proep
5263
5264
5265 /* Set fun->machine->gasisr.maybe provided we don't find anything that
5266    prohibits GAS generating parts of ISR prologues / epilogues for us.  */
5267
5268 void
5269 avr_pass_pre_proep::compute_maybe_gasisr (function *fun)
5270 {
5271   // Don't use BB iterators so that we see JUMP_TABLE_DATA.
5272
5273   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
5274     {
5275       // Transparent calls always use [R]CALL and are filtered out by GAS.
5276       // ISRs don't use -mcall-prologues, hence what remains to be filtered
5277       // out are open coded (tail) calls.
5278
5279       if (CALL_P (insn))
5280         return;
5281
5282       // __tablejump2__ clobbers something and is targeted by JMP so
5283       // that GAS won't see its usage.
5284
5285       if (AVR_HAVE_JMP_CALL
5286           && JUMP_TABLE_DATA_P (insn))
5287         return;
5288
5289       // Non-local gotos not seen in *FUN.
5290
5291       if (JUMP_P (insn)
5292           && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
5293         return;
5294     }
5295
5296   fun->machine->gasisr.maybe = 1;
5297 }
5298
5299
5300 \f
5301 //////////////////////////////////////////////////////////////////////////////
5302 // Late recomputation of notes so we can use `reg_unused_after()' and friends.
5303
5304 static const pass_data avr_pass_data_recompute_notes =
5305 {
5306   RTL_PASS,      // type
5307   "",            // name (will be patched)
5308   OPTGROUP_NONE, // optinfo_flags
5309   TV_DF_SCAN,    // tv_id
5310   0,             // properties_required
5311   0,             // properties_provided
5312   0,             // properties_destroyed
5313   0,             // todo_flags_start
5314   TODO_df_finish | TODO_df_verify // todo_flags_finish
5315 };
5316
5317 class avr_pass_recompute_notes : public rtl_opt_pass
5318 {
5319 public:
5320   avr_pass_recompute_notes (gcc::context *ctxt, const char *name)
5321     : rtl_opt_pass (avr_pass_data_recompute_notes, ctxt)
5322   {
5323     this->name = name;
5324   }
5325
5326   unsigned int execute (function *) final override
5327   {
5328     df_note_add_problem ();
5329     df_analyze ();
5330
5331     return 0;
5332   }
5333 }; // avr_pass_recompute_notes
5334
5335 } // anonymous namespace
5336
5337
5338 \f
5339 //////////////////////////////////////////////////////////////////////////////
5340 // Function visible and used outside this module.
5341
5342 /* During reload, we allow much more addresses than Reduced Tiny actually
5343    supports.  Split them after reload in order to get closer to the
5344    core's capabilities.  This sets the stage for pass .avr-fuse-add.  */
5345
5346 bool
5347 avr_split_fake_addressing_move (rtx_insn * /*insn*/, rtx *xop)
5348 {
5349   bool store_p = false;
5350   rtx mem, reg_or_0;
5351
5352   if (REG_P (xop[0]) && MEM_P (xop[1]))
5353     {
5354       reg_or_0 = xop[0];
5355       mem = xop[1];
5356     }
5357   else if (MEM_P (xop[0])
5358            && (REG_P (xop[1])
5359                || xop[1] == CONST0_RTX (GET_MODE (xop[0]))))
5360     {
5361       mem = xop[0];
5362       reg_or_0 = xop[1];
5363       store_p = true;
5364     }
5365   else
5366     return false;
5367
5368   machine_mode mode = GET_MODE (mem);
5369   rtx base, addr = XEXP (mem, 0);
5370   rtx_code addr_code = GET_CODE (addr);
5371
5372   if (REG_P (reg_or_0)
5373       && reg_overlap_mentioned_p (reg_or_0, addr))
5374     return false;
5375   else if (addr_code == PLUS || addr_code == PRE_DEC || addr_code == POST_INC)
5376     base = XEXP (addr, 0);
5377   else if (addr_code == REG)
5378     base = addr;
5379   else
5380     return false;
5381
5382   if (REGNO (base) > REG_Z)
5383     return false;
5384
5385   if (! AVR_TINY
5386       // Only keep base registers that can't do PLUS addressing.
5387       && ((REGNO (base) != REG_X
5388            && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem)))
5389           || avr_load_libgcc_p (mem)
5390           || avr_mem_memx_p (mem)))
5391     return false;
5392
5393   bool volatile_p = MEM_VOLATILE_P (mem);
5394   bool mem_volatile_p = false;
5395   if (frame_pointer_needed
5396       && REGNO (base) == FRAME_POINTER_REGNUM)
5397     {
5398       if (avropt_fuse_add < 2
5399           // Be a projection (we always split PLUS).
5400           || (avropt_fuse_add == 2 && volatile_p && addr_code != PLUS))
5401         return false;
5402
5403       // Changing the frame pointer locally may confuse later passes
5404       // like .dse2 which don't track changes of FP, not even when
5405       // respective CFA notes are present.  An example is pr22141-1.c.
5406       if (avropt_fuse_add == 2)
5407         mem_volatile_p = true;
5408     }
5409
5410   rtx_code new_code = UNKNOWN;
5411   HOST_WIDE_INT add = 0, sub = 0;
5412   int msize = GET_MODE_SIZE (mode);
5413
5414   AVR_LdSt_Props ap { (int) REGNO (base), store_p, volatile_p,
5415                       ADDR_SPACE_GENERIC };
5416
5417   switch (addr_code)
5418     {
5419     default:
5420       return false;
5421
5422     case PLUS:
5423       add = INTVAL (XEXP (addr, 1));
5424       if (msize == 1)
5425         {
5426           new_code = REG;
5427           sub = -add;
5428         }
5429       else if (ap.want_predec)
5430         {
5431           // volatile stores prefer PRE_DEC (MSB first)
5432           sub = -add;
5433           add += msize;
5434           new_code = PRE_DEC;
5435         }
5436       else
5437         {
5438           new_code = POST_INC;
5439           sub = -add - msize;
5440         }
5441       break;
5442
5443     case POST_INC:
5444       // volatile stores prefer PRE_DEC (MSB first)
5445       if (msize > 1 && ap.want_predec)
5446         {
5447           add = msize;
5448           new_code = PRE_DEC;
5449           sub = msize;
5450           break;
5451         }
5452       return false;
5453
5454     case PRE_DEC:
5455       // volatile loads prefer POST_INC (LSB first)
5456       if (msize > 1 && ap.want_postinc)
5457         {
5458           add = -msize;
5459           new_code = POST_INC;
5460           sub = -msize;
5461           break;
5462         }
5463       return false;
5464
5465     case REG:
5466       if (msize == 1)
5467         return false;
5468
5469       if (ap.want_predec)
5470         {
5471           add = msize;
5472           new_code = PRE_DEC;
5473           sub = 0;
5474         }
5475       else
5476         {
5477           add = 0;
5478           new_code = POST_INC;
5479           sub = -msize;
5480         }
5481       break;
5482     } // switch addr_code
5483
5484   rtx_insn *insn;
5485
5486   if (add)
5487     {
5488       insn = emit_move_ccc (base, plus_constant (Pmode, base, add));
5489       avr_maybe_adjust_cfa (insn, base, add);
5490     }
5491
5492   rtx new_addr = new_code == REG
5493     ? base
5494     : gen_rtx_fmt_e (new_code, Pmode, base);
5495
5496   rtx new_mem = change_address (mem, mode, new_addr);
5497   if (mem_volatile_p)
5498     MEM_VOLATILE_P (new_mem) = 1;
5499
5500   insn = emit_move_ccc (store_p ? new_mem : reg_or_0,
5501                         store_p ? reg_or_0 : new_mem);
5502   if (auto_inc_p (new_addr))
5503     {
5504       add_reg_note (insn, REG_INC, base);
5505       int off = new_code == POST_INC ? msize : -msize;
5506       avr_maybe_adjust_cfa (insn, base, off);
5507     }
5508
5509   if (sub)
5510     {
5511       insn = emit_move_ccc (base, plus_constant (Pmode, base, sub));
5512       avr_maybe_adjust_cfa (insn, base, sub);
5513     }
5514
5515   return true;
5516 }
5517
5518
5519 /* Given memory reference mem(ADDR), return true when it can be split into
5520    single-byte moves, and all resulting addresses are natively supported.
5521    ADDR is in addr-space generic.  */
5522
5523 static bool
5524 splittable_address_p (rtx addr, int n_bytes)
5525 {
5526   if (CONSTANT_ADDRESS_P (addr)
5527       || GET_CODE (addr) == PRE_DEC
5528       || GET_CODE (addr) == POST_INC)
5529     return true;
5530
5531   if (! AVR_TINY)
5532     {
5533       rtx base = select<rtx>()
5534         : REG_P (addr) ? addr
5535         : GET_CODE (addr) == PLUS ? XEXP (addr, 0)
5536         : NULL_RTX;
5537
5538       int off = select<int>()
5539         : REG_P (addr) ? 0
5540         : GET_CODE (addr) == PLUS ? (int) INTVAL (XEXP (addr, 1))
5541         : -1;
5542
5543       return (base && REG_P (base)
5544               && (REGNO (base) == REG_Y || REGNO (base) == REG_Z)
5545               && IN_RANGE (off, 0, 64 - n_bytes));
5546     }
5547
5548   return false;
5549 }
5550
5551
5552 /* Like avr_byte(), but also knows how to split POST_INC and PRE_DEC
5553    memory references.  */
5554
5555 static rtx
5556 avr_byte_maybe_mem (rtx x, int n)
5557 {
5558   rtx addr, b;
5559   if (MEM_P (x)
5560       && (GET_CODE (addr = XEXP (x, 0)) == POST_INC
5561           || GET_CODE (addr) == PRE_DEC))
5562     b = gen_rtx_MEM (QImode, copy_rtx (addr));
5563   else
5564     b = avr_byte (x, n);
5565
5566   if (MEM_P (x))
5567     gcc_assert (MEM_P (b));
5568
5569   return b;
5570 }
5571
5572
5573 /* Split multi-byte load / stores into 1-byte such insns
5574    provided non-volatile, addr-space = generic, no reg-overlap
5575    and the resulting addressings are all natively supported.
5576    Returns true when the  XOP[0] = XOP[1]  insn has been split and
5577    false, otherwise.  */
5578
5579 bool
5580 avr_split_ldst (rtx *xop)
5581 {
5582   rtx dest = xop[0];
5583   rtx src = xop[1];
5584   machine_mode mode = GET_MODE (dest);
5585   int n_bytes = GET_MODE_SIZE (mode);
5586   rtx mem, reg_or_0;
5587
5588   if (MEM_P (dest) && reg_or_0_operand (src, mode))
5589     {
5590       mem = dest;
5591       reg_or_0 = src;
5592     }
5593   else if (register_operand (dest, mode) && MEM_P (src))
5594     {
5595       reg_or_0 = dest;
5596       mem = src;
5597     }
5598   else
5599     return false;
5600
5601   rtx addr = XEXP (mem, 0);
5602
5603   if (MEM_VOLATILE_P (mem)
5604       || ! ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
5605       || ! IN_RANGE (n_bytes, 2, 4)
5606       || ! splittable_address_p (addr, n_bytes)
5607       || reg_overlap_mentioned_p (reg_or_0, addr))
5608     return false;
5609
5610   const int step = GET_CODE (addr) == PRE_DEC ? -1 : 1;
5611   const int istart = step > 0 ? 0 : n_bytes - 1;
5612   const int iend = istart + step * n_bytes;
5613
5614   for (int i = istart; i != iend; i += step)
5615     {
5616       rtx di = avr_byte_maybe_mem (dest, i);
5617       rtx si = avr_byte_maybe_mem (src, i);
5618       emit_move_ccc (di, si);
5619     }
5620
5621   return true;
5622 }
5623
5624
5625 \f
5626 // Functions  make_<pass-name> (gcc::context*)  where <pass-name> is
5627 // according to the pass declaration in avr-passes.def.  GCC's pass
5628 // manager uses these function to create the respective pass object.
5629
5630 // Optimize results of the casesi expander for modes < SImode.
5631
5632 rtl_opt_pass *
5633 make_avr_pass_casesi (gcc::context *ctxt)
5634 {
5635   return new avr_pass_casesi (ctxt, "avr-casesi");
5636 }
5637
5638 // Try to replace 2 cbranch insns with 1 comparison and 2 branches.
5639
5640 rtl_opt_pass *
5641 make_avr_pass_ifelse (gcc::context *ctxt)
5642 {
5643   return new avr_pass_ifelse (ctxt, "avr-ifelse");
5644 }
5645
5646 // Determine whether an ISR may use the __gcc_isr pseudo-instruction.
5647
5648 rtl_opt_pass *
5649 make_avr_pass_pre_proep (gcc::context *ctxt)
5650 {
5651   return new avr_pass_pre_proep (ctxt, "avr-pre-proep");
5652 }
5653
5654 // Find more POST_INC and PRE_DEC cases.
5655
5656 rtl_opt_pass *
5657 make_avr_pass_fuse_add (gcc::context *ctxt)
5658 {
5659   return new avr_pass_fuse_add (ctxt, "avr-fuse-add");
5660 }
5661
5662 // Late recomputation of notes so we can use `reg_unused_after()' and friends.
5663
5664 rtl_opt_pass *
5665 make_avr_pass_recompute_notes (gcc::context *ctxt)
5666 {
5667   return new avr_pass_recompute_notes (ctxt, "avr-notes-free-cfg");
5668 }
5669
5670 // Optimize moves after reload.
5671
5672 rtl_opt_pass *
5673 make_avr_pass_fuse_move (gcc::context *ctxt)
5674 {
5675   return new avr_pass_fuse_move (ctxt, "avr-fuse-move");
5676 }
5677
5678 // Split insns after peephole2 / befor avr-fuse-move.
5679
5680 rtl_opt_pass *
5681 make_avr_pass_split_after_peephole2 (gcc::context *ctxt)
5682 {
5683   return new avr_pass_split_after_peephole2 (ctxt, "avr-split-after-peephole2");
5684 }