c++: Implement for namespace statics CWG 2867 - Order of initialization for structure...
[official-gcc.git] / gcc / ext-dce.cc
blobe257e3bc873aedd3855a3ed7ec2fc01759086383
1 /* RTL dead zero/sign extension (code) elimination.
2 Copyright (C) 2000-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "memmodel.h"
27 #include "insn-config.h"
28 #include "emit-rtl.h"
29 #include "recog.h"
30 #include "cfganal.h"
31 #include "tree-pass.h"
32 #include "cfgrtl.h"
33 #include "rtl-iter.h"
34 #include "df.h"
35 #include "print-rtl.h"
36 #include "dbgcnt.h"
37 #include "diagnostic-core.h"
39 /* These should probably move into a C++ class. */
40 static vec<bitmap_head> livein;
41 static bitmap all_blocks;
42 static bitmap livenow;
43 static bitmap changed_pseudos;
44 static bool modify;
46 /* We consider four bit groups for liveness:
47 bit 0..7 (least significant byte)
48 bit 8..15 (second least significant byte)
49 bit 16..31
50 bit 32..BITS_PER_WORD-1 */
52 /* For the given REG, return the number of bit groups implied by the
53 size of the REG's mode, up to a maximum of 4 (number of bit groups
54 tracked by this pass).
56 For partial integer and variable sized modes also return 4. This
57 could possibly be refined for something like PSI mode, but it
58 does not seem worth the effort. */
60 static int
61 group_limit (const_rtx reg)
63 machine_mode mode = GET_MODE (reg);
65 if (!GET_MODE_BITSIZE (mode).is_constant ())
66 return 4;
68 int size = GET_MODE_SIZE (mode).to_constant ();
70 size = exact_log2 (size);
72 if (size < 0)
73 return 4;
75 size++;
76 return (size > 4 ? 4 : size);
79 /* Make all bit groups live for REGNO in bitmap BMAP. For hard regs,
80 we assume all groups are live. For a pseudo we consider the size
81 of the pseudo to avoid creating unnecessarily live chunks of data. */
83 static void
84 make_reg_live (bitmap bmap, int regno)
86 int limit;
88 /* For pseudos we can use the mode to limit how many bit groups
89 are marked as live since a pseudo only has one mode. Hard
90 registers have to be handled more conservatively. */
91 if (regno > FIRST_PSEUDO_REGISTER)
93 rtx reg = regno_reg_rtx[regno];
94 limit = group_limit (reg);
96 else
97 limit = 4;
99 for (int i = 0; i < limit; i++)
100 bitmap_set_bit (bmap, regno * 4 + i);
103 /* Note this pass could be used to narrow memory loads too. It's
104 not clear if that's profitable or not in general. */
106 #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
108 /* If we know the destination of CODE only uses some low bits
109 (say just the QI bits of an SI operation), then return true
110 if we can propagate the need for just the subset of bits
111 from the destination to the sources.
113 FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
114 operand 0. Thus is likely would need some special casing to handle. */
116 static bool
117 safe_for_live_propagation (rtx_code code)
119 /* First handle rtx classes which as a whole are known to
120 be either safe or unsafe. */
121 switch (GET_RTX_CLASS (code))
123 case RTX_OBJ:
124 case RTX_CONST_OBJ:
125 return true;
127 case RTX_COMPARE:
128 case RTX_COMM_COMPARE:
129 case RTX_TERNARY:
130 return false;
132 default:
133 break;
136 /* What's left are specific codes. We only need to identify those
137 which are safe. */
138 switch (code)
140 /* These are trivially safe. */
141 case SUBREG:
142 case NOT:
143 case ZERO_EXTEND:
144 case SIGN_EXTEND:
145 case TRUNCATE:
146 case PLUS:
147 case MINUS:
148 case MULT:
149 case SMUL_HIGHPART:
150 case UMUL_HIGHPART:
151 case AND:
152 case IOR:
153 case XOR:
154 return true;
156 /* We can propagate for the shifted operand, but not the shift
157 count. The count is handled specially. */
158 case ASHIFT:
159 case LSHIFTRT:
160 case ASHIFTRT:
161 case SS_ASHIFT:
162 case US_ASHIFT:
163 return true;
165 /* There may be other safe codes. If so they can be added
166 individually when discovered. */
167 default:
168 return false;
172 /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
173 set/clobbered by OBJ contained in INSN.
175 Conceptually it is always safe to ignore a particular destination
176 here as that will result in more chunks of data being considered
177 live. That's what happens when we "continue" the main loop when
178 we see something we don't know how to handle such as a vector
179 mode destination.
181 The more accurate we are in identifying what objects (and chunks
182 within an object) are set by INSN, the more aggressive the
183 optimization phase during use handling will be. */
185 static bool
186 ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
188 bool skipped_dest = false;
190 subrtx_iterator::array_type array;
191 FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
193 const_rtx x = *iter;
195 /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
196 if (x == NULL_RTX)
197 continue;
199 if (UNSPEC_P (x))
200 continue;
202 if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
204 unsigned bit = 0;
205 x = SET_DEST (x);
207 /* We don't support vector destinations or destinations
208 wider than DImode. */
209 scalar_int_mode outer_mode;
210 if (!is_a <scalar_int_mode> (GET_MODE (x), &outer_mode)
211 || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
213 /* Skip the subrtxs of this destination. There is
214 little value in iterating into the subobjects, so
215 just skip them for a bit of efficiency. */
216 skipped_dest = true;
217 iter.skip_subrtxes ();
218 continue;
221 /* We could have (strict_low_part (subreg ...)). We can not just
222 strip the STRICT_LOW_PART as that would result in clearing
223 some bits in LIVENOW that are still live. So process the
224 STRICT_LOW_PART specially. */
225 if (GET_CODE (x) == STRICT_LOW_PART)
227 x = XEXP (x, 0);
229 /* The only valid operand of a STRICT_LOW_PART is a non
230 paradoxical SUBREG. */
231 gcc_assert (SUBREG_P (x)
232 && !paradoxical_subreg_p (x)
233 && SUBREG_BYTE (x).is_constant ());
235 /* I think we should always see a REG here. But let's
236 be sure. */
237 gcc_assert (REG_P (SUBREG_REG (x)));
239 /* The inner mode might be larger, just punt for
240 that case. Remember, we can not just continue to process
241 the inner RTXs due to the STRICT_LOW_PART. */
242 if (!is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
243 || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
245 /* Skip the subrtxs of the STRICT_LOW_PART. We can't
246 process them because it'll set objects as no longer
247 live when they are in fact still live. */
248 skipped_dest = true;
249 iter.skip_subrtxes ();
250 continue;
253 /* LIVE_TMP contains the set groups that are live-out and set in
254 this insn. It is used to narrow the groups live-in for the
255 inputs of this insn.
257 The simple thing to do is mark all the groups as live, but
258 that will significantly inhibit optimization.
260 We also need to be careful in the case where we have an in-out
261 operand. If we're not careful we'd clear LIVE_TMP
262 incorrectly. */
263 HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
264 int limit = group_limit (SUBREG_REG (x));
265 for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
266 if (bitmap_bit_p (livenow, i))
267 bitmap_set_bit (live_tmp, i);
269 if (bitmap_empty_p (live_tmp))
270 make_reg_live (live_tmp, rn);
272 /* The mode of the SUBREG tells us how many bits we can
273 clear. */
274 machine_mode mode = GET_MODE (x);
275 HOST_WIDE_INT size
276 = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
277 bitmap_clear_range (livenow, 4 * rn, size);
279 /* We have fully processed this destination. */
280 iter.skip_subrtxes ();
281 continue;
284 /* Phase one of destination handling. First remove any wrapper
285 such as SUBREG or ZERO_EXTRACT. */
286 unsigned HOST_WIDE_INT mask
287 = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
288 if (SUBREG_P (x))
290 /* If we have a SUBREG destination that is too wide, just
291 skip the destination rather than continuing this iterator.
292 While continuing would be better, we'd need to strip the
293 subreg and restart within the SET processing rather than
294 the top of the loop which just complicates the flow even
295 more. */
296 if (!is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
297 || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
299 skipped_dest = true;
300 iter.skip_subrtxes ();
301 continue;
304 /* We can safely strip a paradoxical subreg. The inner mode will
305 be narrower than the outer mode. We'll clear fewer bits in
306 LIVENOW than we'd like, but that's always safe. */
307 if (paradoxical_subreg_p (x))
308 x = XEXP (x, 0);
309 else if (SUBREG_BYTE (x).is_constant ())
311 bit = subreg_lsb (x).to_constant ();
312 mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
313 gcc_assert (mask);
314 x = SUBREG_REG (x);
316 else
317 gcc_unreachable ();
320 if (GET_CODE (x) == ZERO_EXTRACT)
322 /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
323 modifies the bits referenced in the ZERO_EXTRACT, the rest
324 remain the same. Thus we can not continue here, we must
325 either figure out what part of the destination is modified
326 or skip the sub-rtxs. */
327 skipped_dest = true;
328 iter.skip_subrtxes ();
329 continue;
332 /* BIT >= 64 indicates something went horribly wrong. */
333 gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
335 /* Now handle the actual object that was changed. */
336 if (REG_P (x))
338 /* LIVE_TMP contains the set groups that are live-out and set in
339 this insn. It is used to narrow the groups live-in for the
340 inputs of this insn.
342 The simple thing to do is mark all the groups as live, but
343 that will significantly inhibit optimization.
345 We also need to be careful in the case where we have an in-out
346 operand. If we're not careful we'd clear LIVE_TMP
347 incorrectly. */
348 HOST_WIDE_INT rn = REGNO (x);
349 int limit = group_limit (x);
350 for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
351 if (bitmap_bit_p (livenow, i))
352 bitmap_set_bit (live_tmp, i);
354 if (bitmap_empty_p (live_tmp))
355 make_reg_live (live_tmp, rn);
357 /* Now clear the bits known written by this instruction.
358 Note that BIT need not be a power of two, consider a
359 ZERO_EXTRACT destination. */
360 int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
361 int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
362 : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
363 : (mask & 0xff00) ? 2 : 1);
364 bitmap_clear_range (livenow, 4 * rn + start, end - start);
366 /* Some ports generate (clobber (const_int)). */
367 else if (CONST_INT_P (x))
368 continue;
369 else
370 gcc_assert (CALL_P (insn)
371 || MEM_P (x)
372 || x == pc_rtx
373 || GET_CODE (x) == SCRATCH);
375 iter.skip_subrtxes ();
377 else if (GET_CODE (x) == COND_EXEC)
379 /* This isn't ideal, but may not be so bad in practice. */
380 skipped_dest = true;
381 iter.skip_subrtxes ();
384 return skipped_dest;
387 /* INSN has a sign/zero extended source inside SET that we will
388 try to turn into a SUBREG. */
389 static void
390 ext_dce_try_optimize_insn (rtx_insn *insn, rtx set)
392 rtx src = SET_SRC (set);
393 rtx inner = XEXP (src, 0);
395 /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
396 not useful for this optimization. */
397 if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
398 return;
400 rtx new_pattern;
401 if (dump_file)
403 fprintf (dump_file, "Processing insn:\n");
404 dump_insn_slim (dump_file, insn);
405 fprintf (dump_file, "Trying to simplify pattern:\n");
406 print_rtl_single (dump_file, SET_SRC (set));
409 /* We decided to turn do the optimization but allow it to be rejected for
410 bisection purposes. */
411 if (!dbg_cnt (::ext_dce))
413 if (dump_file)
414 fprintf (dump_file, "Rejected due to debug counter.\n");
415 return;
418 new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
419 GET_MODE (inner), 0);
420 /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
421 We must not pass that as a replacement pattern to validate_change. */
422 if (new_pattern)
424 int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
426 rtx x = SET_DEST (set);
427 while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
428 x = XEXP (x, 0);
430 gcc_assert (REG_P (x));
431 if (ok)
432 bitmap_set_bit (changed_pseudos, REGNO (x));
434 if (dump_file)
436 if (ok)
437 fprintf (dump_file, "Successfully transformed to:\n");
438 else
439 fprintf (dump_file, "Failed transformation to:\n");
441 print_rtl_single (dump_file, new_pattern);
442 fprintf (dump_file, "\n");
445 else
447 if (dump_file)
448 fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
452 /* Some operators imply that their second operand is fully live,
453 regardless of how many bits in the output are live. An example
454 would be the shift count on a target without SHIFT_COUNT_TRUNCATED
455 defined.
457 Return TRUE if CODE is such an operator. FALSE otherwise. */
459 static bool
460 binop_implies_op2_fully_live (rtx_code code)
462 switch (code)
464 case ASHIFT:
465 case LSHIFTRT:
466 case ASHIFTRT:
467 case ROTATE:
468 case ROTATERT:
469 case SS_ASHIFT:
470 case US_ASHIFT:
471 return !SHIFT_COUNT_TRUNCATED;
473 default:
474 return false;
478 /* X, with code CODE, is an operation for which safe_for_live_propagation
479 holds true, and bits set in MASK are live in the result. Compute a
480 mask of (potentially) live bits in the non-constant inputs. In case of
481 binop_implies_op2_fully_live (e.g. shifts), the computed mask may
482 exclusively pertain to the first operand.
484 This looks wrong as we may have some important operations embedded as
485 operands of another operation. For example, we might have an extension
486 wrapping a shift. It really feels like this needs to be recursing down
487 into operands much more often. */
489 unsigned HOST_WIDE_INT
490 carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
492 if (mask == 0)
493 return 0;
495 enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
496 unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
498 /* While we don't try to optimize operations on types larger
499 than 64 bits, we do want to make sure not to invoke undefined
500 behavior when presented with such operations during use
501 processing. The safe thing to do is to just return mmask
502 for that scenario indicating every possible chunk is life. */
503 scalar_int_mode smode;
504 if (!is_a <scalar_int_mode> (mode, &smode)
505 || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
506 return mmask;
508 switch (code)
510 case PLUS:
511 case MINUS:
512 case MULT:
513 return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
515 /* We propagate for the shifted operand, but not the shift
516 count. The count is handled specially. */
517 case ASHIFT:
518 if (CONST_INT_P (XEXP (x, 1))
519 && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
520 return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
521 return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
523 /* We propagate for the shifted operand, but not the shift
524 count. The count is handled specially. */
525 case LSHIFTRT:
526 if (CONST_INT_P (XEXP (x, 1))
527 && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
528 return mmask & (mask << INTVAL (XEXP (x, 1)));
529 return mmask;
531 /* We propagate for the shifted operand, but not the shift
532 count. The count is handled specially. */
533 case ASHIFTRT:
534 if (CONST_INT_P (XEXP (x, 1))
535 && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
537 HOST_WIDE_INT sign = 0;
538 if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
539 > GET_MODE_BITSIZE (smode))
540 sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
541 return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
543 return mmask;
545 case SMUL_HIGHPART:
546 case UMUL_HIGHPART:
547 if (XEXP (x, 1) == const0_rtx)
548 return 0;
549 if (XEXP (x, 1) == const1_rtx)
550 return mmask;
551 if (CONST_INT_P (XEXP (x, 1)))
553 if (pow2p_hwi (INTVAL (XEXP (x, 1))))
554 return mmask & (mask << (GET_MODE_BITSIZE (smode)
555 - exact_log2 (INTVAL (XEXP (x, 1)))));
557 int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
558 - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
559 if (bits < GET_MODE_BITSIZE (smode))
560 return (HOST_WIDE_INT_1U << bits) - 1;
562 return mmask;
564 case SIGN_EXTEND:
565 if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
566 || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
567 return -1;
569 /* We want the mode of the inner object. We need to ensure its
570 sign bit is on in MASK. */
571 mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
572 if (mask & ~GET_MODE_MASK (mode))
573 mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
574 - 1);
576 /* Recurse into the operand. */
577 return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
579 case ZERO_EXTEND:
580 if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
581 || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
582 return -1;
584 /* Recurse into the operand. */
585 return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
587 /* We propagate for the shifted operand, but not the shift
588 count. The count is handled specially. */
589 case SS_ASHIFT:
590 case US_ASHIFT:
591 if (CONST_INT_P (XEXP (x, 1))
592 && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
594 return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
595 >> (INTVAL (XEXP (x, 1))
596 + (XEXP (x, 1) != const0_rtx
597 && code == SS_ASHIFT))))
598 | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
600 return mmask;
602 default:
603 return mask;
607 /* Process uses in INSN contained in OBJ. Set appropriate bits in LIVENOW
608 for any chunks of pseudos that become live, potentially filtering using
609 bits from LIVE_TMP.
611 If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
612 the extended bits are never read and mark pseudos which had extensions
613 eliminated in CHANGED_PSEUDOS. */
615 static void
616 ext_dce_process_uses (rtx_insn *insn, rtx obj,
617 bitmap live_tmp, bool skipped_dest)
619 subrtx_var_iterator::array_type array_var;
620 FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
622 /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
623 rtx x = *iter;
624 if (x == NULL_RTX)
625 continue;
627 /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
628 handle SETs explicitly, possibly propagating live information
629 into the uses.
631 We may continue the loop at various points which will cause
632 iteration into the next level of RTL. Breaking from the loop
633 is never safe as it can lead us to fail to process some of the
634 RTL and thus not make objects live when necessary. */
635 enum rtx_code xcode = GET_CODE (x);
636 if (xcode == SET)
638 const_rtx dst = SET_DEST (x);
639 rtx src = SET_SRC (x);
640 const_rtx y;
641 unsigned HOST_WIDE_INT bit = 0;
643 /* The code of the RHS of a SET. */
644 enum rtx_code code = GET_CODE (src);
646 /* ?!? How much of this should mirror SET handling, potentially
647 being shared? */
648 if (SUBREG_P (dst) && SUBREG_BYTE (dst).is_constant ())
650 bit = subreg_lsb (dst).to_constant ();
651 if (bit >= HOST_BITS_PER_WIDE_INT)
652 bit = HOST_BITS_PER_WIDE_INT - 1;
653 dst = SUBREG_REG (dst);
655 else if (GET_CODE (dst) == STRICT_LOW_PART)
656 dst = XEXP (dst, 0);
658 /* Main processing of the uses. Two major goals here.
660 First, we want to try and propagate liveness (or the lack
661 thereof) from the destination register to the source
662 register(s).
664 Second, if the source is an extension, try to optimize
665 it into a SUBREG. The SUBREG form indicates we don't
666 care about the upper bits and will usually be copy
667 propagated away.
669 If we fail to handle something in here, the expectation
670 is the iterator will dive into the sub-components and
671 mark all the chunks in any found REGs as live. */
672 if (REG_P (dst) && safe_for_live_propagation (code))
674 /* Create a mask representing the bits of this output
675 operand that are live after this insn. We can use
676 this information to refine the live in state of
677 inputs to this insn in many cases.
679 We have to do this on a per SET basis, we might have
680 an INSN with multiple SETS, some of which can narrow
681 the source operand liveness, some of which may not. */
682 unsigned HOST_WIDE_INT dst_mask = 0;
683 HOST_WIDE_INT rn = REGNO (dst);
684 unsigned HOST_WIDE_INT mask_array[]
685 = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
686 -HOST_WIDE_INT_UC (0x100000000) };
687 for (int i = 0; i < 4; i++)
688 if (bitmap_bit_p (live_tmp, 4 * rn + i))
689 dst_mask |= mask_array[i];
690 dst_mask >>= bit;
692 /* If we ignored a destination during set processing, then
693 consider all the bits live. */
694 if (skipped_dest)
695 dst_mask = -1;
697 dst_mask = carry_backpropagate (dst_mask, code, src);
699 /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
700 of the source specially to improve optimization. */
701 if (code == SIGN_EXTEND || code == ZERO_EXTEND)
703 rtx inner = XEXP (src, 0);
704 unsigned HOST_WIDE_INT src_mask
705 = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
707 /* DST_MASK could be zero if we had something in the SET
708 that we couldn't handle. */
709 if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
710 ext_dce_try_optimize_insn (insn, x);
712 /* Stripping the extension here just seems wrong on multiple
713 levels. It's source side handling, so it seems like it
714 belongs in the loop below. Stripping here also makes it
715 harder than necessary to properly handle live bit groups
716 for (ANY_EXTEND (SUBREG)) where the SUBREG has
717 SUBREG_PROMOTED state. */
718 dst_mask &= src_mask;
719 src = XEXP (src, 0);
720 code = GET_CODE (src);
723 /* Optimization is done at this point. We just want to make
724 sure everything that should get marked as live is marked
725 from here onward. */
727 /* We will handle the other operand of a binary operator
728 at the bottom of the loop by resetting Y. */
729 if (BINARY_P (src))
730 y = XEXP (src, 0);
731 else
732 y = src;
734 /* We're inside a SET and want to process the source operands
735 making things live. Breaking from this loop will cause
736 the iterator to work on sub-rtxs, so it is safe to break
737 if we see something we don't know how to handle.
739 This code is just hokey as it really just handles trivial
740 unary and binary cases. Otherwise the loop exits and we
741 continue iterating on sub-rtxs, but outside the set context. */
742 unsigned HOST_WIDE_INT save_mask = dst_mask;
743 for (;;)
745 /* In general we want to restore DST_MASK before each loop
746 iteration. The exception is when the opcode implies that
747 the other operand is fully live. That's handled by
748 changing SAVE_MASK below. */
749 dst_mask = save_mask;
750 /* Strip an outer paradoxical subreg. The bits outside
751 the inner mode are don't cares. So we can just strip
752 and process the inner object. */
753 if (paradoxical_subreg_p (y))
754 y = XEXP (y, 0);
755 else if (SUBREG_P (y) && SUBREG_BYTE (y).is_constant ())
757 /* We really want to know the outer code here, ie do we
758 have (ANY_EXTEND (SUBREG ...)) as we need to know if
759 the extension matches the SUBREG_PROMOTED state. In
760 that case optimizers can turn the extension into a
761 simple copy. Which means that bits outside the
762 SUBREG's mode are actually live.
764 We don't want to mark those bits live unnecessarily
765 as that inhibits extension elimination in important
766 cases such as those in Coremark. So we need that
767 outer code. */
768 if (!REG_P (SUBREG_REG (y))
769 || (SUBREG_PROMOTED_VAR_P (y)
770 && ((GET_CODE (SET_SRC (x)) == SIGN_EXTEND
771 && SUBREG_PROMOTED_SIGNED_P (y))
772 || (GET_CODE (SET_SRC (x)) == ZERO_EXTEND
773 && SUBREG_PROMOTED_UNSIGNED_P (y)))))
774 break;
776 bit = subreg_lsb (y).to_constant ();
778 /* If this is a wide object (more bits than we can fit
779 in a HOST_WIDE_INT), then just break from the SET
780 context. That will cause the iterator to walk down
781 into the subrtx and if we land on a REG we'll mark
782 the whole think live. */
783 if (bit >= HOST_BITS_PER_WIDE_INT)
784 break;
786 /* The SUBREG's mode determines the live width. */
787 if (dst_mask)
789 dst_mask <<= bit;
790 if (!dst_mask)
791 dst_mask = -HOST_WIDE_INT_UC (0x100000000);
793 y = SUBREG_REG (y);
796 if (REG_P (y))
798 /* We have found the use of a register. We need to mark
799 the appropriate chunks of the register live. The mode
800 of the REG is a starting point. We may refine that
801 based on what chunks in the output were live. */
802 rn = 4 * REGNO (y);
803 unsigned HOST_WIDE_INT tmp_mask = dst_mask;
805 /* If the RTX code for the SET_SRC is not one we can
806 propagate destination liveness through, then just
807 set the mask to the mode's mask. */
808 if (!safe_for_live_propagation (code))
809 tmp_mask
810 = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
812 if (tmp_mask & 0xff)
813 bitmap_set_bit (livenow, rn);
814 if (tmp_mask & 0xff00)
815 bitmap_set_bit (livenow, rn + 1);
816 if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
817 bitmap_set_bit (livenow, rn + 2);
818 if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
819 bitmap_set_bit (livenow, rn + 3);
821 else if (!CONSTANT_P (y))
822 break;
824 /* We might have (ashift (const_int 1) (reg...))
825 By setting dst_mask we can continue iterating on the
826 the next operand and it will be considered fully live.
828 Note that since we restore DST_MASK from SAVE_MASK at the
829 top of the loop, we have to change SAVE_MASK to get the
830 semantics we want. */
831 if (binop_implies_op2_fully_live (GET_CODE (src)))
832 save_mask = -1;
834 /* If this was anything but a binary operand, break the inner
835 loop. This is conservatively correct as it will cause the
836 iterator to look at the sub-rtxs outside the SET context. */
837 if (!BINARY_P (src))
838 break;
840 /* We processed the first operand of a binary operator. Now
841 handle the second. */
842 y = XEXP (src, 1), src = pc_rtx;
845 /* These are leaf nodes, no need to iterate down into them. */
846 if (REG_P (y) || CONSTANT_P (y))
847 iter.skip_subrtxes ();
850 /* If we are reading the low part of a SUBREG, then we can
851 refine liveness of the input register, otherwise let the
852 iterator continue into SUBREG_REG. */
853 else if (SUBREG_P (x)
854 && REG_P (SUBREG_REG (x))
855 && !paradoxical_subreg_p (x)
856 && subreg_lowpart_p (x)
857 && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
858 && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
860 HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
861 HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
863 /* If this is a promoted subreg, then more of it may be live than
864 is otherwise obvious. */
865 if (SUBREG_PROMOTED_VAR_P (x))
866 size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
868 bitmap_set_bit (livenow, rn);
869 if (size > 8)
870 bitmap_set_bit (livenow, rn + 1);
871 if (size > 16)
872 bitmap_set_bit (livenow, rn + 2);
873 if (size >= 32)
874 bitmap_set_bit (livenow, rn + 3);
875 iter.skip_subrtxes ();
877 /* If we have a register reference that is not otherwise handled,
878 just assume all the chunks are live. */
879 else if (REG_P (x))
880 bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
884 /* Process a single basic block BB with current liveness information
885 in LIVENOW, returning updated liveness information.
887 If MODIFY is true, then this is the last pass and unnecessary
888 extensions should be eliminated when possible. If an extension
889 is removed, the source pseudo is marked in CHANGED_PSEUDOS. */
891 static void
892 ext_dce_process_bb (basic_block bb)
894 rtx_insn *insn;
896 FOR_BB_INSNS_REVERSE (bb, insn)
898 if (!NONDEBUG_INSN_P (insn))
899 continue;
901 /* Live-out state of the destination of this insn. We can
902 use this to refine the live-in state of the sources of
903 this insn in many cases. */
904 bitmap live_tmp = BITMAP_ALLOC (NULL);
906 /* First process any sets/clobbers in INSN. */
907 bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
909 /* CALL_INSNs need processing their fusage data. */
910 if (CALL_P (insn))
911 skipped_dest |= ext_dce_process_sets (insn,
912 CALL_INSN_FUNCTION_USAGE (insn),
913 live_tmp);
915 /* And now uses, optimizing away SIGN/ZERO extensions as we go. */
916 ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
918 /* A nonlocal goto implicitly uses the frame pointer. */
919 if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
921 bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
922 if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
923 bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
926 /* And process fusage data for the use as well. */
927 if (CALL_P (insn))
929 if (!FAKE_CALL_P (insn))
930 bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
932 /* If this is not a call to a const fucntion, then assume it
933 can read any global register. */
934 if (!RTL_CONST_CALL_P (insn))
935 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
936 if (global_regs[i])
937 bitmap_set_range (livenow, i * 4, 4);
939 ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
942 BITMAP_FREE (live_tmp);
946 /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
947 is usually helpful. However, in some cases setting the value when
948 it not strictly needed can cause this pass to miss optimizations.
950 Specifically consider (set (mem) (subreg (reg))). If set in that
951 case it will cause more bit groups to be live for REG than would
952 be strictly necessary which in turn can inhibit extension removal.
954 So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
955 is obviously not needed. */
957 static void
958 maybe_clear_subreg_promoted_p (void)
960 for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
962 if (!NONDEBUG_INSN_P (insn))
963 continue;
965 rtx set = single_set (insn);
966 if (!set)
967 continue;
969 /* There may be other cases where we should clear, but for
970 now, this is the only known case where it causes problems. */
971 if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
972 && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
973 SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
978 /* We optimize away sign/zero extensions in this pass and replace
979 them with SUBREGs indicating certain bits are don't cares.
981 This changes the SUBREG_PROMOTED_VAR_P state of the object.
982 It is fairly painful to fix this on the fly, so we have
983 recorded which pseudos are affected and we look for SUBREGs
984 of those pseudos and fix them up. */
986 static void
987 reset_subreg_promoted_p (void)
989 /* If we removed an extension, that changed the promoted state
990 of the destination of that extension. Thus we need to go
991 find any SUBREGs that reference that pseudo and adjust their
992 SUBREG_PROMOTED_P state. */
993 for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
995 if (!NONDEBUG_INSN_P (insn))
996 continue;
998 rtx pat = PATTERN (insn);
999 subrtx_var_iterator::array_type array;
1000 FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
1002 rtx sub = *iter;
1004 /* We only care about SUBREGs. */
1005 if (GET_CODE (sub) != SUBREG)
1006 continue;
1008 const_rtx x = SUBREG_REG (sub);
1010 /* We only care if the inner object is a REG. */
1011 if (!REG_P (x))
1012 continue;
1014 /* And only if the SUBREG is a promoted var. */
1015 if (!SUBREG_PROMOTED_VAR_P (sub))
1016 continue;
1018 if (bitmap_bit_p (changed_pseudos, REGNO (x)))
1019 SUBREG_PROMOTED_VAR_P (sub) = 0;
1024 /* Initialization of the ext-dce pass. Primarily this means
1025 setting up the various bitmaps we utilize. */
1027 static void
1028 ext_dce_init (void)
1030 livein.create (last_basic_block_for_fn (cfun));
1031 livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
1032 for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1033 bitmap_initialize (&livein[i], &bitmap_default_obstack);
1035 auto_bitmap refs (&bitmap_default_obstack);
1036 df_get_exit_block_use_set (refs);
1038 unsigned i;
1039 bitmap_iterator bi;
1040 EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
1041 make_reg_live (&livein[EXIT_BLOCK], i);
1043 livenow = BITMAP_ALLOC (NULL);
1044 all_blocks = BITMAP_ALLOC (NULL);
1045 changed_pseudos = BITMAP_ALLOC (NULL);
1047 for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1048 if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
1049 bitmap_set_bit (all_blocks, i);
1051 modify = false;
1054 /* Finalization of the ext-dce pass. Primarily this means
1055 releasing up the various bitmaps we utilize. */
1057 static void
1058 ext_dce_finish (void)
1060 for (unsigned i = 0; i < livein.length (); i++)
1061 bitmap_clear (&livein[i]);
1062 livein.release ();
1064 BITMAP_FREE (livenow);
1065 BITMAP_FREE (changed_pseudos);
1066 BITMAP_FREE (all_blocks);
1069 /* Process block number BB_INDEX as part of the backward
1070 simple dataflow analysis. Return TRUE if something in
1071 this block changed or FALSE otherwise. */
1073 static bool
1074 ext_dce_rd_transfer_n (int bb_index)
1076 /* The ENTRY/EXIT blocks never change. */
1077 if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
1078 return false;
1080 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
1082 /* Make everything live that's live in the successors. */
1083 bitmap_clear (livenow);
1084 edge_iterator ei;
1085 edge e;
1087 FOR_EACH_EDGE (e, ei, bb->succs)
1088 bitmap_ior_into (livenow, &livein[e->dest->index]);
1090 ext_dce_process_bb (bb);
1092 /* We may have narrowed the set of live objects at the start
1093 of this block. If so, update the bitmaps and indicate to
1094 the generic dataflow code that something changed. */
1095 if (!bitmap_equal_p (&livein[bb_index], livenow))
1097 bitmap_copy (&livein[bb_index], livenow);
1098 return true;
1101 return false;
1104 /* Dummy function for the df_simple_dataflow API. */
1105 static bool ext_dce_rd_confluence_n (edge) { return true; }
1107 /* Use lifetime analyis to identify extensions that set bits that
1108 are never read. Turn such extensions into SUBREGs instead which
1109 can often be propagated away. */
1111 void
1112 ext_dce_execute (void)
1114 /* Limit the amount of memory we use for livein, with 4 bits per
1115 reg per basic-block including overhead that maps to one byte
1116 per reg per basic-block. */
1117 uint64_t memory_request
1118 = (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
1119 if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
1121 warning (OPT_Wdisabled_optimization,
1122 "ext-dce disabled: %d basic blocks and %d registers; "
1123 "increase %<--param max-gcse-memory%> above %wu",
1124 n_basic_blocks_for_fn (cfun), max_reg_num (),
1125 memory_request / 1024);
1126 return;
1129 /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
1130 to this pass. Clear it for those cases. */
1131 maybe_clear_subreg_promoted_p ();
1132 df_analyze ();
1133 ext_dce_init ();
1137 df_simple_dataflow (DF_BACKWARD, NULL, NULL,
1138 ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
1139 all_blocks, df_get_postorder (DF_BACKWARD),
1140 df_get_n_blocks (DF_BACKWARD));
1141 modify = !modify;
1143 while (modify);
1145 reset_subreg_promoted_p ();
1147 ext_dce_finish ();
1151 namespace {
1153 const pass_data pass_data_ext_dce =
1155 RTL_PASS, /* type */
1156 "ext_dce", /* name */
1157 OPTGROUP_NONE, /* optinfo_flags */
1158 TV_EXT_DCE, /* tv_id */
1159 PROP_cfglayout, /* properties_required */
1160 0, /* properties_provided */
1161 0, /* properties_destroyed */
1162 0, /* todo_flags_start */
1163 TODO_df_finish, /* todo_flags_finish */
1166 class pass_ext_dce : public rtl_opt_pass
1168 public:
1169 pass_ext_dce (gcc::context *ctxt)
1170 : rtl_opt_pass (pass_data_ext_dce, ctxt)
1173 /* opt_pass methods: */
1174 virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
1175 virtual unsigned int execute (function *)
1177 ext_dce_execute ();
1178 return 0;
1181 }; // class pass_combine
1183 } // anon namespace
1185 rtl_opt_pass *
1186 make_pass_ext_dce (gcc::context *ctxt)
1188 return new pass_ext_dce (ctxt);