1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2024 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
143 #include "coretypes.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
151 #include "gimple-pretty-print.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
167 #include "gimplify-me.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
172 #include "selftest.h"
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
186 /* Number of hand-written 16-bit nop / bswaps found. */
189 /* Number of hand-written 32-bit nop / bswaps found. */
192 /* Number of hand-written 64-bit nop / bswaps found. */
194 } nop_stats
, bswap_stats
;
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
221 Note 2: for non-memory sources, range holds the same value as size.
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225 struct symbolic_number
{
234 unsigned HOST_WIDE_INT range
;
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
261 do_shift_rotate (enum tree_code code
,
262 struct symbolic_number
*n
,
265 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
266 uint64_t head_marker
;
269 || count
>= TYPE_PRECISION (n
->type
)
270 || count
% BITS_PER_UNIT
!= 0)
272 count
= (count
/ BITS_PER_UNIT
) * BITS_PER_MARKER
;
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size
< 64 / BITS_PER_MARKER
)
277 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
285 head_marker
= HEAD_MARKER (n
->n
, size
);
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n
->type
) && head_marker
)
289 for (i
= 0; i
< count
/ BITS_PER_MARKER
; i
++)
290 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size
- 1 - i
) * BITS_PER_MARKER
);
294 n
->n
= (n
->n
<< count
) | (n
->n
>> ((size
* BITS_PER_MARKER
) - count
));
297 n
->n
= (n
->n
>> count
) | (n
->n
<< ((size
* BITS_PER_MARKER
) - count
));
302 /* Zero unused bits for size. */
303 if (size
< 64 / BITS_PER_MARKER
)
304 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
308 /* Perform sanity checking for the symbolic number N and the gimple
312 verify_symbolic_number_p (struct symbolic_number
*n
, gimple
*stmt
)
316 lhs_type
= TREE_TYPE (gimple_get_lhs (stmt
));
318 if (TREE_CODE (lhs_type
) != INTEGER_TYPE
319 && TREE_CODE (lhs_type
) != ENUMERAL_TYPE
)
322 if (TYPE_PRECISION (lhs_type
) != TYPE_PRECISION (n
->type
))
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
332 init_symbolic_number (struct symbolic_number
*n
, tree src
)
336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src
)) && !POINTER_TYPE_P (TREE_TYPE (src
)))
339 n
->base_addr
= n
->offset
= n
->alias_set
= n
->vuse
= NULL_TREE
;
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n
->type
= TREE_TYPE (src
);
346 size
= TYPE_PRECISION (n
->type
);
347 if (size
% BITS_PER_UNIT
!= 0)
349 size
/= BITS_PER_UNIT
;
350 if (size
> 64 / BITS_PER_MARKER
)
356 if (size
< 64 / BITS_PER_MARKER
)
357 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
367 find_bswap_or_nop_load (gimple
*stmt
, tree ref
, struct symbolic_number
*n
)
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize
, bitpos
, bytepos
;
373 int unsignedp
, reversep
, volatilep
;
374 tree offset
, base_addr
;
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
380 if (!gimple_assign_load_p (stmt
) || gimple_has_volatile_ops (stmt
))
383 base_addr
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
384 &unsignedp
, &reversep
, &volatilep
);
386 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
387 /* Do not rewrite TARGET_MEM_REF. */
389 else if (TREE_CODE (base_addr
) == MEM_REF
)
391 poly_offset_int bit_offset
= 0;
392 tree off
= TREE_OPERAND (base_addr
, 1);
394 if (!integer_zerop (off
))
396 poly_offset_int boff
= mem_ref_offset (base_addr
);
397 boff
<<= LOG2_BITS_PER_UNIT
;
401 base_addr
= TREE_OPERAND (base_addr
, 0);
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset
, 0))
406 tree byte_offset
= wide_int_to_tree
407 (sizetype
, bits_to_bytes_round_down (bit_offset
));
408 bit_offset
= num_trailing_bits (bit_offset
);
410 offset
= size_binop (PLUS_EXPR
, offset
, byte_offset
);
412 offset
= byte_offset
;
415 bitpos
+= bit_offset
.force_shwi ();
418 base_addr
= build_fold_addr_expr (base_addr
);
420 if (!multiple_p (bitpos
, BITS_PER_UNIT
, &bytepos
))
422 if (!multiple_p (bitsize
, BITS_PER_UNIT
))
427 if (!init_symbolic_number (n
, ref
))
429 n
->base_addr
= base_addr
;
431 n
->bytepos
= bytepos
;
432 n
->alias_set
= reference_alias_ptr_type (ref
);
433 n
->vuse
= gimple_vuse (stmt
);
437 /* Compute the symbolic number N representing the result of a bitwise OR,
438 bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
439 are respectively SOURCE_STMT1 and SOURCE_STMT2. CODE is the operation. */
442 perform_symbolic_merge (gimple
*source_stmt1
, struct symbolic_number
*n1
,
443 gimple
*source_stmt2
, struct symbolic_number
*n2
,
444 struct symbolic_number
*n
, enum tree_code code
)
449 struct symbolic_number
*n_start
;
451 tree rhs1
= gimple_assign_rhs1 (source_stmt1
);
452 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
454 rhs1
= TREE_OPERAND (rhs1
, 0);
455 tree rhs2
= gimple_assign_rhs1 (source_stmt2
);
456 if (TREE_CODE (rhs2
) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2
, 0)) == SSA_NAME
)
458 rhs2
= TREE_OPERAND (rhs2
, 0);
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
465 HOST_WIDE_INT start1
, start2
, start_sub
, end_sub
, end1
, end2
, end
;
466 struct symbolic_number
*toinc_n_ptr
, *n_end
;
467 basic_block bb1
, bb2
;
469 if (!n1
->base_addr
|| !n2
->base_addr
470 || !operand_equal_p (n1
->base_addr
, n2
->base_addr
, 0))
473 if (!n1
->offset
!= !n2
->offset
474 || (n1
->offset
&& !operand_equal_p (n1
->offset
, n2
->offset
, 0)))
478 if (!(n2
->bytepos
- n1
->bytepos
).is_constant (&start2
))
484 start_sub
= start2
- start1
;
489 start_sub
= start1
- start2
;
492 bb1
= gimple_bb (source_stmt1
);
493 bb2
= gimple_bb (source_stmt2
);
494 if (dominated_by_p (CDI_DOMINATORS
, bb1
, bb2
))
495 source_stmt
= source_stmt1
;
497 source_stmt
= source_stmt2
;
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1
= start1
+ (n1
->range
- 1);
502 end2
= start2
+ (n2
->range
- 1);
506 end_sub
= end2
- end1
;
511 end_sub
= end1
- end2
;
513 n_end
= (end2
> end1
) ? n2
: n1
;
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN
)
517 toinc_n_ptr
= (n_end
== n1
) ? n2
: n1
;
519 toinc_n_ptr
= (n_start
== n1
) ? n2
: n1
;
521 n
->range
= end
- MIN (start1
, start2
) + 1;
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n
->range
> 64 / BITS_PER_MARKER
)
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc
= BYTES_BIG_ENDIAN
? end_sub
: start_sub
;
531 size
= TYPE_PRECISION (n1
->type
) / BITS_PER_UNIT
;
532 for (i
= 0; i
< size
; i
++, inc
<<= BITS_PER_MARKER
)
535 = (toinc_n_ptr
->n
>> (i
* BITS_PER_MARKER
)) & MARKER_MASK
;
536 if (marker
&& marker
!= MARKER_BYTE_UNKNOWN
)
537 toinc_n_ptr
->n
+= inc
;
542 n
->range
= n1
->range
;
544 source_stmt
= source_stmt1
;
548 || alias_ptr_types_compatible_p (n1
->alias_set
, n2
->alias_set
))
549 n
->alias_set
= n1
->alias_set
;
551 n
->alias_set
= ptr_type_node
;
552 n
->vuse
= n_start
->vuse
;
553 n
->base_addr
= n_start
->base_addr
;
554 n
->offset
= n_start
->offset
;
555 n
->src
= n_start
->src
;
556 n
->bytepos
= n_start
->bytepos
;
557 n
->type
= n_start
->type
;
558 size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
559 uint64_t res_n
= n1
->n
| n2
->n
;
561 for (i
= 0, mask
= MARKER_MASK
; i
< size
; i
++, mask
<<= BITS_PER_MARKER
)
563 uint64_t masked1
, masked2
;
565 masked1
= n1
->n
& mask
;
566 masked2
= n2
->n
& mask
;
567 /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x. */
568 if (masked1
&& masked2
)
570 /* + can carry into upper bits, just punt. */
571 if (code
== PLUS_EXPR
)
573 /* x | x is still x. */
574 if (code
== BIT_IOR_EXPR
&& masked1
== masked2
)
576 if (code
== BIT_XOR_EXPR
)
578 /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
579 unknown values and unknown ^ unknown is unknown. */
580 if (masked1
== masked2
581 && masked1
!= ((uint64_t) MARKER_BYTE_UNKNOWN
582 << i
* BITS_PER_MARKER
))
588 /* Otherwise set the byte to unknown, it might still be
594 n
->n_ops
= n1
->n_ops
+ n2
->n_ops
;
599 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
600 the operation given by the rhs of STMT on the result. If the operation
601 could successfully be executed the function returns a gimple stmt whose
602 rhs's first tree is the expression of the source operand and NULL
606 find_bswap_or_nop_1 (gimple
*stmt
, struct symbolic_number
*n
, int limit
)
609 tree rhs1
, rhs2
= NULL
;
610 gimple
*rhs1_stmt
, *rhs2_stmt
, *source_stmt1
;
611 enum gimple_rhs_class rhs_class
;
614 || !is_gimple_assign (stmt
)
615 || stmt_can_throw_internal (cfun
, stmt
))
618 rhs1
= gimple_assign_rhs1 (stmt
);
620 if (find_bswap_or_nop_load (stmt
, rhs1
, n
))
623 /* Handle BIT_FIELD_REF. */
624 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
625 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
627 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1
, 1))
628 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1
, 2)))
631 unsigned HOST_WIDE_INT bitsize
= tree_to_uhwi (TREE_OPERAND (rhs1
, 1));
632 unsigned HOST_WIDE_INT bitpos
= tree_to_uhwi (TREE_OPERAND (rhs1
, 2));
633 if (bitpos
% BITS_PER_UNIT
== 0
634 && bitsize
% BITS_PER_UNIT
== 0
635 && init_symbolic_number (n
, TREE_OPERAND (rhs1
, 0)))
637 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
638 if (BYTES_BIG_ENDIAN
)
639 bitpos
= TYPE_PRECISION (n
->type
) - bitpos
- bitsize
;
642 if (!do_shift_rotate (RSHIFT_EXPR
, n
, bitpos
))
647 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
648 for (unsigned i
= 0; i
< bitsize
/ BITS_PER_UNIT
;
649 i
++, tmp
<<= BITS_PER_UNIT
)
650 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
654 n
->type
= TREE_TYPE (rhs1
);
655 if (!verify_symbolic_number_p (n
, stmt
))
659 n
->range
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
667 if (TREE_CODE (rhs1
) != SSA_NAME
)
670 code
= gimple_assign_rhs_code (stmt
);
671 rhs_class
= gimple_assign_rhs_class (stmt
);
672 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
674 if (rhs_class
== GIMPLE_BINARY_RHS
)
675 rhs2
= gimple_assign_rhs2 (stmt
);
677 /* Handle unary rhs and binary rhs with integer constants as second
680 if (rhs_class
== GIMPLE_UNARY_RHS
681 || (rhs_class
== GIMPLE_BINARY_RHS
682 && TREE_CODE (rhs2
) == INTEGER_CST
))
684 if (code
!= BIT_AND_EXPR
685 && code
!= LSHIFT_EXPR
686 && code
!= RSHIFT_EXPR
687 && code
!= LROTATE_EXPR
688 && code
!= RROTATE_EXPR
689 && !CONVERT_EXPR_CODE_P (code
))
692 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, n
, limit
- 1);
694 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
695 we have to initialize the symbolic number. */
698 if (gimple_assign_load_p (stmt
)
699 || !init_symbolic_number (n
, rhs1
))
708 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
709 uint64_t val
= int_cst_value (rhs2
), mask
= 0;
710 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
712 /* Only constants masking full bytes are allowed. */
713 for (i
= 0; i
< size
; i
++, tmp
<<= BITS_PER_UNIT
)
714 if ((val
& tmp
) != 0 && (val
& tmp
) != tmp
)
717 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
726 if (!do_shift_rotate (code
, n
, (int) TREE_INT_CST_LOW (rhs2
)))
731 int i
, type_size
, old_type_size
;
734 type
= TREE_TYPE (gimple_assign_lhs (stmt
));
735 type_size
= TYPE_PRECISION (type
);
736 if (type_size
% BITS_PER_UNIT
!= 0)
738 type_size
/= BITS_PER_UNIT
;
739 if (type_size
> 64 / BITS_PER_MARKER
)
742 /* Sign extension: result is dependent on the value. */
743 old_type_size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
744 if (!TYPE_UNSIGNED (n
->type
) && type_size
> old_type_size
745 && HEAD_MARKER (n
->n
, old_type_size
))
746 for (i
= 0; i
< type_size
- old_type_size
; i
++)
747 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
748 << ((type_size
- 1 - i
) * BITS_PER_MARKER
);
750 if (type_size
< 64 / BITS_PER_MARKER
)
752 /* If STMT casts to a smaller type mask out the bits not
753 belonging to the target type. */
754 n
->n
&= ((uint64_t) 1 << (type_size
* BITS_PER_MARKER
)) - 1;
758 n
->range
= type_size
;
764 return verify_symbolic_number_p (n
, stmt
) ? source_stmt1
: NULL
;
767 /* Handle binary rhs. */
769 if (rhs_class
== GIMPLE_BINARY_RHS
)
771 struct symbolic_number n1
, n2
;
772 gimple
*source_stmt
, *source_stmt2
;
774 if (!rhs2
|| TREE_CODE (rhs2
) != SSA_NAME
)
777 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
784 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, &n1
, limit
- 1);
789 source_stmt2
= find_bswap_or_nop_1 (rhs2_stmt
, &n2
, limit
- 1);
794 if (TYPE_PRECISION (n1
.type
) != TYPE_PRECISION (n2
.type
))
797 if (n1
.vuse
!= n2
.vuse
)
801 = perform_symbolic_merge (source_stmt1
, &n1
, source_stmt2
, &n2
, n
,
807 if (!verify_symbolic_number_p (n
, stmt
))
819 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
820 *CMPXCHG, *CMPNOP and adjust *N. */
823 find_bswap_or_nop_finalize (struct symbolic_number
*n
, uint64_t *cmpxchg
,
824 uint64_t *cmpnop
, bool *cast64_to_32
)
829 /* The number which the find_bswap_or_nop_1 result should match in order
830 to have a full byte swap. The number is shifted to the right
831 according to the size of the symbolic number before using it. */
834 *cast64_to_32
= false;
836 /* Find real size of result (highest non-zero byte). */
838 for (tmpn
= n
->n
, rsize
= 0; tmpn
; tmpn
>>= BITS_PER_MARKER
, rsize
++);
842 /* Zero out the bits corresponding to untouched bytes in original gimple
844 if (n
->range
< (int) sizeof (int64_t))
846 mask
= ((uint64_t) 1 << (n
->range
* BITS_PER_MARKER
)) - 1;
847 if (n
->base_addr
== NULL
849 && int_size_in_bytes (TREE_TYPE (n
->src
)) == 8)
851 /* If all bytes in n->n are either 0 or in [5..8] range, this
852 might be a candidate for (unsigned) __builtin_bswap64 (src).
853 It is not worth it for (unsigned short) __builtin_bswap64 (src)
854 or (unsigned short) __builtin_bswap32 (src). */
855 *cast64_to_32
= true;
856 for (tmpn
= n
->n
; tmpn
; tmpn
>>= BITS_PER_MARKER
)
857 if ((tmpn
& MARKER_MASK
)
858 && ((tmpn
& MARKER_MASK
) <= 4 || (tmpn
& MARKER_MASK
) > 8))
860 *cast64_to_32
= false;
867 *cmpxchg
>>= (64 / BITS_PER_MARKER
- n
->range
) * BITS_PER_MARKER
;
871 /* Zero out the bits corresponding to unused bytes in the result of the
872 gimple expression. */
873 if (rsize
< n
->range
)
875 if (BYTES_BIG_ENDIAN
)
877 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
879 if (n
->range
- rsize
== sizeof (int64_t))
882 *cmpnop
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
886 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
887 if (n
->range
- rsize
== sizeof (int64_t))
890 *cmpxchg
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
898 n
->range
*= BITS_PER_UNIT
;
901 /* Helper function for find_bswap_or_nop,
902 Return true if N is a swap or nop with MASK. */
904 is_bswap_or_nop_p (uint64_t n
, uint64_t cmpxchg
,
905 uint64_t cmpnop
, uint64_t* mask
,
908 *mask
= ~(uint64_t) 0;
911 else if (n
== cmpxchg
)
916 for (uint64_t msk
= MARKER_MASK
; msk
; msk
<<= BITS_PER_MARKER
)
919 else if ((n
& msk
) == (cmpxchg
& msk
))
932 /* Check if STMT completes a bswap implementation or a read in a given
933 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
934 accordingly. It also sets N to represent the kind of operations
935 performed: size of the resulting expression and whether it works on
936 a memory source, and if so alias-set and vuse. At last, the
937 function returns a stmt whose rhs's first tree is the source
941 find_bswap_or_nop (gimple
*stmt
, struct symbolic_number
*n
, bool *bswap
,
942 bool *cast64_to_32
, uint64_t *mask
, uint64_t* l_rotate
)
944 tree type_size
= TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt
)));
945 if (!tree_fits_uhwi_p (type_size
))
948 /* The last parameter determines the depth search limit. It usually
949 correlates directly to the number n of bytes to be touched. We
950 increase that number by 2 * (log2(n) + 1) here in order to also
951 cover signed -> unsigned conversions of the src operand as can be seen
952 in libgcc, and for initial shift/and operation of the src operand. */
953 int limit
= tree_to_uhwi (type_size
);
954 limit
+= 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT
) limit
));
955 gimple
*ins_stmt
= find_bswap_or_nop_1 (stmt
, n
, limit
);
959 if (gimple_assign_rhs_code (stmt
) != CONSTRUCTOR
960 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
962 unsigned HOST_WIDE_INT sz
= tree_to_uhwi (type_size
) * BITS_PER_UNIT
;
963 if (sz
!= 16 && sz
!= 32 && sz
!= 64)
965 tree rhs
= gimple_assign_rhs1 (stmt
);
966 if (CONSTRUCTOR_NELTS (rhs
) == 0)
968 tree eltype
= TREE_TYPE (TREE_TYPE (rhs
));
969 unsigned HOST_WIDE_INT eltsz
970 = int_size_in_bytes (eltype
) * BITS_PER_UNIT
;
971 if (TYPE_PRECISION (eltype
) != eltsz
)
973 constructor_elt
*elt
;
975 tree type
= build_nonstandard_integer_type (sz
, 1);
976 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
978 if (TREE_CODE (elt
->value
) != SSA_NAME
979 || !INTEGRAL_TYPE_P (TREE_TYPE (elt
->value
)))
981 struct symbolic_number n1
;
983 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt
->value
), &n1
,
991 n1
.range
= sz
/ BITS_PER_UNIT
;
995 ins_stmt
= source_stmt
;
1000 if (n
->vuse
!= n1
.vuse
)
1003 struct symbolic_number n0
= *n
;
1005 if (!BYTES_BIG_ENDIAN
)
1007 if (!do_shift_rotate (LSHIFT_EXPR
, &n1
, i
* eltsz
))
1010 else if (!do_shift_rotate (LSHIFT_EXPR
, &n0
, eltsz
))
1013 = perform_symbolic_merge (ins_stmt
, &n0
, source_stmt
, &n1
, n
,
1022 uint64_t cmpxchg
, cmpnop
;
1023 uint64_t orig_range
= n
->range
* BITS_PER_UNIT
;
1024 find_bswap_or_nop_finalize (n
, &cmpxchg
, &cmpnop
, cast64_to_32
);
1026 /* A complete byte swap should make the symbolic number to start with
1027 the largest digit in the highest order byte. Unchanged symbolic
1028 number indicates a read with same endianness as target architecture. */
1030 uint64_t tmp_n
= n
->n
;
1031 if (!is_bswap_or_nop_p (tmp_n
, cmpxchg
, cmpnop
, mask
, bswap
))
1033 /* Try bswap + lrotate. */
1034 /* TODO, handle cast64_to_32 and big/litte_endian memory
1035 source when rsize < range. */
1036 if (n
->range
== orig_range
1037 /* There're case like 0x300000200 for uint32->uint64 cast,
1038 Don't hanlde this. */
1039 && n
->range
== TYPE_PRECISION (n
->type
)
1040 && ((orig_range
== 32
1041 && optab_handler (rotl_optab
, SImode
) != CODE_FOR_nothing
)
1042 || (orig_range
== 64
1043 && optab_handler (rotl_optab
, DImode
) != CODE_FOR_nothing
))
1044 && (tmp_n
& MARKER_MASK
) < orig_range
/ BITS_PER_UNIT
)
1046 uint64_t range
= (orig_range
/ BITS_PER_UNIT
) * BITS_PER_MARKER
;
1047 uint64_t count
= (tmp_n
& MARKER_MASK
) * BITS_PER_MARKER
;
1048 /* .i.e. hanlde 0x203040506070800 when lower byte is zero. */
1051 for (uint64_t i
= 1; i
!= range
/ BITS_PER_MARKER
; i
++)
1053 count
= (tmp_n
>> i
* BITS_PER_MARKER
) & MARKER_MASK
;
1056 /* Count should be meaningful not 0xff. */
1057 if (count
<= range
/ BITS_PER_MARKER
)
1059 count
= (count
+ i
) * BITS_PER_MARKER
% range
;
1067 tmp_n
= tmp_n
>> count
| tmp_n
<< (range
- count
);
1068 if (orig_range
== 32)
1069 tmp_n
&= (1ULL << 32) - 1;
1070 if (!is_bswap_or_nop_p (tmp_n
, cmpxchg
, cmpnop
, mask
, bswap
))
1072 *l_rotate
= count
/ BITS_PER_MARKER
* BITS_PER_UNIT
;
1073 gcc_assert (*bswap
);
1079 /* Useless bit manipulation performed by code. */
1080 if (!n
->base_addr
&& n
->n
== cmpnop
&& n
->n_ops
== 1)
1086 const pass_data pass_data_optimize_bswap
=
1088 GIMPLE_PASS
, /* type */
1090 OPTGROUP_NONE
, /* optinfo_flags */
1091 TV_NONE
, /* tv_id */
1092 PROP_ssa
, /* properties_required */
1093 0, /* properties_provided */
1094 0, /* properties_destroyed */
1095 0, /* todo_flags_start */
1096 0, /* todo_flags_finish */
1099 class pass_optimize_bswap
: public gimple_opt_pass
1102 pass_optimize_bswap (gcc::context
*ctxt
)
1103 : gimple_opt_pass (pass_data_optimize_bswap
, ctxt
)
1106 /* opt_pass methods: */
1107 bool gate (function
*) final override
1109 return flag_expensive_optimizations
&& optimize
&& BITS_PER_UNIT
== 8;
1112 unsigned int execute (function
*) final override
;
1114 }; // class pass_optimize_bswap
1116 /* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1117 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1121 bswap_view_convert (gimple_stmt_iterator
*gsi
, tree type
, tree val
,
1124 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val
))
1125 || POINTER_TYPE_P (TREE_TYPE (val
)));
1126 if (TYPE_SIZE (type
) != TYPE_SIZE (TREE_TYPE (val
)))
1128 HOST_WIDE_INT prec
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
1129 if (POINTER_TYPE_P (TREE_TYPE (val
)))
1132 = gimple_build_assign (make_ssa_name (pointer_sized_int_node
),
1135 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1137 gsi_insert_after (gsi
, g
, GSI_NEW_STMT
);
1138 val
= gimple_assign_lhs (g
);
1140 tree itype
= build_nonstandard_integer_type (prec
, 1);
1141 gimple
*g
= gimple_build_assign (make_ssa_name (itype
), NOP_EXPR
, val
);
1143 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1145 gsi_insert_after (gsi
, g
, GSI_NEW_STMT
);
1146 val
= gimple_assign_lhs (g
);
1148 return build1 (VIEW_CONVERT_EXPR
, type
, val
);
1151 /* Perform the bswap optimization: replace the expression computed in the rhs
1152 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1153 bswap, load or load + bswap expression.
1154 Which of these alternatives replace the rhs is given by N->base_addr (non
1155 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1156 load to perform are also given in N while the builtin bswap invoke is given
1157 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1158 load statements involved to construct the rhs in gsi_stmt (GSI) and
1159 N->range gives the size of the rhs expression for maintaining some
1162 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1163 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1164 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
1167 bswap_replace (gimple_stmt_iterator gsi
, gimple
*ins_stmt
, tree fndecl
,
1168 tree bswap_type
, tree load_type
, struct symbolic_number
*n
,
1169 bool bswap
, uint64_t mask
, uint64_t l_rotate
)
1171 tree src
, tmp
, tgt
= NULL_TREE
;
1172 gimple
*bswap_stmt
, *mask_stmt
= NULL
, *rotl_stmt
= NULL
;
1173 tree_code conv_code
= NOP_EXPR
;
1175 gimple
*cur_stmt
= gsi_stmt (gsi
);
1179 tgt
= gimple_assign_lhs (cur_stmt
);
1180 if (gimple_assign_rhs_code (cur_stmt
) == CONSTRUCTOR
1182 && VECTOR_TYPE_P (TREE_TYPE (tgt
)))
1183 conv_code
= VIEW_CONVERT_EXPR
;
1186 /* Need to load the value from memory first. */
1189 gimple_stmt_iterator gsi_ins
= gsi
;
1191 gsi_ins
= gsi_for_stmt (ins_stmt
);
1192 tree addr_expr
, addr_tmp
, val_expr
, val_tmp
;
1193 tree load_offset_ptr
, aligned_load_type
;
1195 unsigned align
= get_object_alignment (src
);
1196 poly_int64 load_offset
= 0;
1200 basic_block ins_bb
= gimple_bb (ins_stmt
);
1201 basic_block cur_bb
= gimple_bb (cur_stmt
);
1202 if (!dominated_by_p (CDI_DOMINATORS
, cur_bb
, ins_bb
))
1205 /* Move cur_stmt just before one of the load of the original
1206 to ensure it has the same VUSE. See PR61517 for what could
1208 if (gimple_bb (cur_stmt
) != gimple_bb (ins_stmt
))
1209 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt
));
1210 gsi_move_before (&gsi
, &gsi_ins
);
1211 gsi
= gsi_for_stmt (cur_stmt
);
1216 /* Compute address to load from and cast according to the size
1218 addr_expr
= build_fold_addr_expr (src
);
1219 if (is_gimple_mem_ref_addr (addr_expr
))
1220 addr_tmp
= unshare_expr (addr_expr
);
1223 addr_tmp
= unshare_expr (n
->base_addr
);
1224 if (!is_gimple_mem_ref_addr (addr_tmp
))
1225 addr_tmp
= force_gimple_operand_gsi_1 (&gsi
, addr_tmp
,
1226 is_gimple_mem_ref_addr
,
1229 load_offset
= n
->bytepos
;
1233 = force_gimple_operand_gsi (&gsi
, unshare_expr (n
->offset
),
1234 true, NULL_TREE
, true,
1237 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp
)),
1238 POINTER_PLUS_EXPR
, addr_tmp
, off
);
1239 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1240 addr_tmp
= gimple_assign_lhs (stmt
);
1244 /* Perform the load. */
1245 aligned_load_type
= load_type
;
1246 if (align
< TYPE_ALIGN (load_type
))
1247 aligned_load_type
= build_aligned_type (load_type
, align
);
1248 load_offset_ptr
= build_int_cst (n
->alias_set
, load_offset
);
1249 val_expr
= fold_build2 (MEM_REF
, aligned_load_type
, addr_tmp
,
1255 nop_stats
.found_16bit
++;
1256 else if (n
->range
== 32)
1257 nop_stats
.found_32bit
++;
1260 gcc_assert (n
->range
== 64);
1261 nop_stats
.found_64bit
++;
1264 /* Convert the result of load if necessary. */
1265 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), load_type
))
1267 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
,
1269 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1270 gimple_set_vuse (load_stmt
, n
->vuse
);
1271 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1272 if (conv_code
== VIEW_CONVERT_EXPR
)
1273 val_tmp
= bswap_view_convert (&gsi
, TREE_TYPE (tgt
), val_tmp
,
1275 gimple_assign_set_rhs_with_ops (&gsi
, conv_code
, val_tmp
);
1276 update_stmt (cur_stmt
);
1280 gimple_assign_set_rhs_with_ops (&gsi
, MEM_REF
, val_expr
);
1281 gimple_set_vuse (cur_stmt
, n
->vuse
);
1282 update_stmt (cur_stmt
);
1286 tgt
= make_ssa_name (load_type
);
1287 cur_stmt
= gimple_build_assign (tgt
, MEM_REF
, val_expr
);
1288 gimple_set_vuse (cur_stmt
, n
->vuse
);
1289 gsi_insert_before (&gsi
, cur_stmt
, GSI_SAME_STMT
);
1295 "%d bit load in target endianness found at: ",
1297 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1303 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
, "load_dst");
1304 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1305 gimple_set_vuse (load_stmt
, n
->vuse
);
1306 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1313 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), TREE_TYPE (src
)))
1315 if (!is_gimple_val (src
))
1317 if (conv_code
== VIEW_CONVERT_EXPR
)
1318 src
= bswap_view_convert (&gsi
, TREE_TYPE (tgt
), src
, true);
1319 g
= gimple_build_assign (tgt
, conv_code
, src
);
1322 g
= gimple_build_assign (tgt
, src
);
1326 nop_stats
.found_16bit
++;
1327 else if (n
->range
== 32)
1328 nop_stats
.found_32bit
++;
1331 gcc_assert (n
->range
== 64);
1332 nop_stats
.found_64bit
++;
1337 "%d bit reshuffle in target endianness found at: ",
1340 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1343 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1344 fprintf (dump_file
, "\n");
1348 gsi_replace (&gsi
, g
, true);
1351 else if (TREE_CODE (src
) == BIT_FIELD_REF
)
1352 src
= TREE_OPERAND (src
, 0);
1355 bswap_stats
.found_16bit
++;
1356 else if (n
->range
== 32)
1357 bswap_stats
.found_32bit
++;
1360 gcc_assert (n
->range
== 64);
1361 bswap_stats
.found_64bit
++;
1366 /* Convert the src expression if necessary. */
1367 if (!useless_type_conversion_p (TREE_TYPE (tmp
), bswap_type
))
1369 gimple
*convert_stmt
;
1371 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapsrc");
1372 convert_stmt
= gimple_build_assign (tmp
, NOP_EXPR
, src
);
1373 gsi_insert_before (&gsi
, convert_stmt
, GSI_SAME_STMT
);
1376 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1377 are considered as rotation of 2N bit values by N bits is generally not
1378 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1379 gives 0x03040102 while a bswap for that value is 0x04030201. */
1380 if (bswap
&& n
->range
== 16)
1382 tree count
= build_int_cst (NULL
, BITS_PER_UNIT
);
1383 src
= fold_build2 (LROTATE_EXPR
, bswap_type
, tmp
, count
);
1384 bswap_stmt
= gimple_build_assign (NULL
, src
);
1387 bswap_stmt
= gimple_build_call (fndecl
, 1, tmp
);
1389 if (tgt
== NULL_TREE
)
1390 tgt
= make_ssa_name (bswap_type
);
1393 if (mask
!= ~(uint64_t) 0)
1395 tree m
= build_int_cst (bswap_type
, mask
);
1396 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
1397 gimple_set_lhs (bswap_stmt
, tmp
);
1398 mask_stmt
= gimple_build_assign (tgt
, BIT_AND_EXPR
, tmp
, m
);
1404 tree m
= build_int_cst (bswap_type
, l_rotate
);
1405 tmp
= make_temp_ssa_name (bswap_type
, NULL
,
1406 mask_stmt
? "bswapmaskdst" : "bswapdst");
1407 gimple_set_lhs (mask_stmt
? mask_stmt
: bswap_stmt
, tmp
);
1408 rotl_stmt
= gimple_build_assign (tgt
, LROTATE_EXPR
, tmp
, m
);
1412 /* Convert the result if necessary. */
1413 if (!useless_type_conversion_p (TREE_TYPE (tgt
), bswap_type
))
1415 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
1417 gimple_stmt_iterator gsi2
= gsi
;
1418 if (conv_code
== VIEW_CONVERT_EXPR
)
1419 atmp
= bswap_view_convert (&gsi2
, TREE_TYPE (tgt
), tmp
, false);
1420 gimple
*convert_stmt
= gimple_build_assign (tgt
, conv_code
, atmp
);
1421 gsi_insert_after (&gsi2
, convert_stmt
, GSI_SAME_STMT
);
1424 gimple_set_lhs (rotl_stmt
? rotl_stmt
1425 : mask_stmt
? mask_stmt
: bswap_stmt
, tmp
);
1429 fprintf (dump_file
, "%d bit bswap implementation found at: ",
1432 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1435 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1436 fprintf (dump_file
, "\n");
1443 gsi_insert_after (&gsi
, rotl_stmt
, GSI_SAME_STMT
);
1445 gsi_insert_after (&gsi
, mask_stmt
, GSI_SAME_STMT
);
1446 gsi_insert_after (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1447 gsi_remove (&gsi
, true);
1451 gsi_insert_before (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1453 gsi_insert_before (&gsi
, mask_stmt
, GSI_SAME_STMT
);
1455 gsi_insert_after (&gsi
, rotl_stmt
, GSI_SAME_STMT
);
1460 /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1461 using bswap optimizations. CDI_DOMINATORS need to be
1462 computed on entry. Return true if it has been optimized and
1463 TODO_update_ssa is needed. */
1466 maybe_optimize_vector_constructor (gimple
*cur_stmt
)
1468 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
1469 struct symbolic_number n
;
1472 gcc_assert (is_gimple_assign (cur_stmt
)
1473 && gimple_assign_rhs_code (cur_stmt
) == CONSTRUCTOR
);
1475 tree rhs
= gimple_assign_rhs1 (cur_stmt
);
1476 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
1477 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
)))
1478 || gimple_assign_lhs (cur_stmt
) == NULL_TREE
)
1481 HOST_WIDE_INT sz
= int_size_in_bytes (TREE_TYPE (rhs
)) * BITS_PER_UNIT
;
1485 load_type
= bswap_type
= uint16_type_node
;
1488 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1489 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)
1491 load_type
= uint32_type_node
;
1492 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1493 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1499 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
1500 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
1501 || (word_mode
== SImode
1502 && builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1503 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)))
1505 load_type
= uint64_type_node
;
1506 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1507 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1517 uint64_t mask
, l_rotate
;
1518 gimple
*ins_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
,
1519 &cast64_to_32
, &mask
, &l_rotate
);
1521 || n
.range
!= (unsigned HOST_WIDE_INT
) sz
1523 || mask
!= ~(uint64_t) 0)
1526 if (bswap
&& !fndecl
&& n
.range
!= 16)
1529 memset (&nop_stats
, 0, sizeof (nop_stats
));
1530 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
1531 return bswap_replace (gsi_for_stmt (cur_stmt
), ins_stmt
, fndecl
,
1532 bswap_type
, load_type
, &n
, bswap
, mask
,
1533 l_rotate
) != NULL_TREE
;
1536 /* Find manual byte swap implementations as well as load in a given
1537 endianness. Byte swaps are turned into a bswap builtin invokation
1538 while endian loads are converted to bswap builtin invokation or
1539 simple load according to the target endianness. */
1542 pass_optimize_bswap::execute (function
*fun
)
1545 bool bswap32_p
, bswap64_p
;
1546 bool changed
= false;
1547 tree bswap32_type
= NULL_TREE
, bswap64_type
= NULL_TREE
;
1549 bswap32_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1550 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
);
1551 bswap64_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
1552 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
1553 || (bswap32_p
&& word_mode
== SImode
)));
1555 /* Determine the argument type of the builtins. The code later on
1556 assumes that the return and argument type are the same. */
1559 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1560 bswap32_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1565 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1566 bswap64_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1569 memset (&nop_stats
, 0, sizeof (nop_stats
));
1570 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
1571 calculate_dominance_info (CDI_DOMINATORS
);
1573 FOR_EACH_BB_FN (bb
, fun
)
1575 gimple_stmt_iterator gsi
;
1577 /* We do a reverse scan for bswap patterns to make sure we get the
1578 widest match. As bswap pattern matching doesn't handle previously
1579 inserted smaller bswap replacements as sub-patterns, the wider
1580 variant wouldn't be detected. */
1581 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
);)
1583 gimple
*ins_stmt
, *cur_stmt
= gsi_stmt (gsi
);
1584 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
1585 enum tree_code code
;
1586 struct symbolic_number n
;
1587 bool bswap
, cast64_to_32
;
1588 uint64_t mask
, l_rotate
;
1590 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1591 might be moved to a different basic block by bswap_replace and gsi
1592 must not points to it if that's the case. Moving the gsi_prev
1593 there make sure that gsi points to the statement previous to
1594 cur_stmt while still making sure that all statements are
1595 considered in this basic block. */
1598 if (!is_gimple_assign (cur_stmt
))
1601 code
= gimple_assign_rhs_code (cur_stmt
);
1606 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt
))
1607 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt
))
1617 tree rhs
= gimple_assign_rhs1 (cur_stmt
);
1618 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1619 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
))))
1627 ins_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
,
1628 &cast64_to_32
, &mask
, &l_rotate
);
1636 /* Already in canonical form, nothing to do. */
1637 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1639 load_type
= bswap_type
= uint16_type_node
;
1642 load_type
= uint32_type_node
;
1645 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1646 bswap_type
= bswap32_type
;
1650 load_type
= uint64_type_node
;
1653 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1654 bswap_type
= bswap64_type
;
1661 if (bswap
&& !fndecl
&& n
.range
!= 16)
1664 if (bswap_replace (gsi_for_stmt (cur_stmt
), ins_stmt
, fndecl
,
1665 bswap_type
, load_type
, &n
, bswap
, mask
,
1671 statistics_counter_event (fun
, "16-bit nop implementations found",
1672 nop_stats
.found_16bit
);
1673 statistics_counter_event (fun
, "32-bit nop implementations found",
1674 nop_stats
.found_32bit
);
1675 statistics_counter_event (fun
, "64-bit nop implementations found",
1676 nop_stats
.found_64bit
);
1677 statistics_counter_event (fun
, "16-bit bswap implementations found",
1678 bswap_stats
.found_16bit
);
1679 statistics_counter_event (fun
, "32-bit bswap implementations found",
1680 bswap_stats
.found_32bit
);
1681 statistics_counter_event (fun
, "64-bit bswap implementations found",
1682 bswap_stats
.found_64bit
);
1684 return (changed
? TODO_update_ssa
: 0);
1690 make_pass_optimize_bswap (gcc::context
*ctxt
)
1692 return new pass_optimize_bswap (ctxt
);
1697 /* Struct recording one operand for the store, which is either a constant,
1698 then VAL represents the constant and all the other fields are zero, or
1699 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1700 and the other fields also reflect the memory load, or an SSA name, then
1701 VAL represents the SSA name and all the other fields are zero. */
1703 class store_operand_info
1708 poly_uint64 bitsize
;
1710 poly_uint64 bitregion_start
;
1711 poly_uint64 bitregion_end
;
1714 store_operand_info ();
1717 store_operand_info::store_operand_info ()
1718 : val (NULL_TREE
), base_addr (NULL_TREE
), bitsize (0), bitpos (0),
1719 bitregion_start (0), bitregion_end (0), stmt (NULL
), bit_not_p (false)
1723 /* Struct recording the information about a single store of an immediate
1724 to memory. These are created in the first phase and coalesced into
1725 merged_store_group objects in the second phase. */
1727 class store_immediate_info
1730 unsigned HOST_WIDE_INT bitsize
;
1731 unsigned HOST_WIDE_INT bitpos
;
1732 unsigned HOST_WIDE_INT bitregion_start
;
1733 /* This is one past the last bit of the bit region. */
1734 unsigned HOST_WIDE_INT bitregion_end
;
1737 /* INTEGER_CST for constant store, STRING_CST for string store,
1738 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1739 BIT_INSERT_EXPR for bit insertion.
1740 LROTATE_EXPR if it can be only bswap optimized and
1741 ops are not really meaningful.
1742 NOP_EXPR if bswap optimization detected identity, ops
1743 are not meaningful. */
1744 enum tree_code rhs_code
;
1745 /* Two fields for bswap optimization purposes. */
1746 struct symbolic_number n
;
1748 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1750 /* True if ops have been swapped and thus ops[1] represents
1751 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1753 /* The index number of the landing pad, or 0 if there is none. */
1755 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1756 just the first one. */
1757 store_operand_info ops
[2];
1758 store_immediate_info (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1759 unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1760 gimple
*, unsigned int, enum tree_code
,
1761 struct symbolic_number
&, gimple
*, bool, int,
1762 const store_operand_info
&,
1763 const store_operand_info
&);
1766 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs
,
1767 unsigned HOST_WIDE_INT bp
,
1768 unsigned HOST_WIDE_INT brs
,
1769 unsigned HOST_WIDE_INT bre
,
1772 enum tree_code rhscode
,
1773 struct symbolic_number
&nr
,
1777 const store_operand_info
&op0r
,
1778 const store_operand_info
&op1r
)
1779 : bitsize (bs
), bitpos (bp
), bitregion_start (brs
), bitregion_end (bre
),
1780 stmt (st
), order (ord
), rhs_code (rhscode
), n (nr
),
1781 ins_stmt (ins_stmtp
), bit_not_p (bitnotp
), ops_swapped_p (false),
1782 lp_nr (nr2
), ops
{ op0r
, op1r
}
1786 /* Struct representing a group of stores to contiguous memory locations.
1787 These are produced by the second phase (coalescing) and consumed in the
1788 third phase that outputs the widened stores. */
1790 class merged_store_group
1793 unsigned HOST_WIDE_INT start
;
1794 unsigned HOST_WIDE_INT width
;
1795 unsigned HOST_WIDE_INT bitregion_start
;
1796 unsigned HOST_WIDE_INT bitregion_end
;
1797 /* The size of the allocated memory for val and mask. */
1798 unsigned HOST_WIDE_INT buf_size
;
1799 unsigned HOST_WIDE_INT align_base
;
1800 poly_uint64 load_align_base
[2];
1803 unsigned int load_align
[2];
1804 unsigned int first_order
;
1805 unsigned int last_order
;
1807 bool string_concatenation
;
1808 bool only_constants
;
1810 unsigned int first_nonmergeable_order
;
1813 auto_vec
<store_immediate_info
*> stores
;
1814 /* We record the first and last original statements in the sequence because
1815 we'll need their vuse/vdef and replacement position. It's easier to keep
1816 track of them separately as 'stores' is reordered by apply_stores. */
1820 unsigned char *mask
;
1822 merged_store_group (store_immediate_info
*);
1823 ~merged_store_group ();
1824 bool can_be_merged_into (store_immediate_info
*);
1825 void merge_into (store_immediate_info
*);
1826 void merge_overlapping (store_immediate_info
*);
1827 bool apply_stores ();
1829 void do_merge (store_immediate_info
*);
1832 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1835 dump_char_array (FILE *fd
, unsigned char *ptr
, unsigned int len
)
1840 for (unsigned int i
= 0; i
< len
; i
++)
1841 fprintf (fd
, "%02x ", ptr
[i
]);
1845 /* Clear out LEN bits starting from bit START in the byte array
1846 PTR. This clears the bits to the *right* from START.
1847 START must be within [0, BITS_PER_UNIT) and counts starting from
1848 the least significant bit. */
1851 clear_bit_region_be (unsigned char *ptr
, unsigned int start
,
1856 /* Clear len bits to the right of start. */
1857 else if (len
<= start
+ 1)
1859 unsigned char mask
= (~(~0U << len
));
1860 mask
= mask
<< (start
+ 1U - len
);
1863 else if (start
!= BITS_PER_UNIT
- 1)
1865 clear_bit_region_be (ptr
, start
, (start
% BITS_PER_UNIT
) + 1);
1866 clear_bit_region_be (ptr
+ 1, BITS_PER_UNIT
- 1,
1867 len
- (start
% BITS_PER_UNIT
) - 1);
1869 else if (start
== BITS_PER_UNIT
- 1
1870 && len
> BITS_PER_UNIT
)
1872 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1873 memset (ptr
, 0, nbytes
);
1874 if (len
% BITS_PER_UNIT
!= 0)
1875 clear_bit_region_be (ptr
+ nbytes
, BITS_PER_UNIT
- 1,
1876 len
% BITS_PER_UNIT
);
1882 /* In the byte array PTR clear the bit region starting at bit
1883 START and is LEN bits wide.
1884 For regions spanning multiple bytes do this recursively until we reach
1885 zero LEN or a region contained within a single byte. */
1888 clear_bit_region (unsigned char *ptr
, unsigned int start
,
1891 /* Degenerate base case. */
1894 else if (start
>= BITS_PER_UNIT
)
1895 clear_bit_region (ptr
+ 1, start
- BITS_PER_UNIT
, len
);
1896 /* Second base case. */
1897 else if ((start
+ len
) <= BITS_PER_UNIT
)
1899 unsigned char mask
= (~0U) << (unsigned char) (BITS_PER_UNIT
- len
);
1900 mask
>>= BITS_PER_UNIT
- (start
+ len
);
1906 /* Clear most significant bits in a byte and proceed with the next byte. */
1907 else if (start
!= 0)
1909 clear_bit_region (ptr
, start
, BITS_PER_UNIT
- start
);
1910 clear_bit_region (ptr
+ 1, 0, len
- (BITS_PER_UNIT
- start
));
1912 /* Whole bytes need to be cleared. */
1913 else if (start
== 0 && len
> BITS_PER_UNIT
)
1915 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1916 /* We could recurse on each byte but we clear whole bytes, so a simple
1918 memset (ptr
, '\0', nbytes
);
1919 /* Clear the remaining sub-byte region if there is one. */
1920 if (len
% BITS_PER_UNIT
!= 0)
1921 clear_bit_region (ptr
+ nbytes
, 0, len
% BITS_PER_UNIT
);
1927 /* Write BITLEN bits of EXPR to the byte array PTR at
1928 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1929 Return true if the operation succeeded. */
1932 encode_tree_to_bitpos (tree expr
, unsigned char *ptr
, int bitlen
, int bitpos
,
1933 unsigned int total_bytes
)
1935 unsigned int first_byte
= bitpos
/ BITS_PER_UNIT
;
1936 bool sub_byte_op_p
= ((bitlen
% BITS_PER_UNIT
)
1937 || (bitpos
% BITS_PER_UNIT
)
1938 || !int_mode_for_size (bitlen
, 0).exists ());
1940 = (TREE_CODE (expr
) == CONSTRUCTOR
1941 && CONSTRUCTOR_NELTS (expr
) == 0
1942 && TYPE_SIZE_UNIT (TREE_TYPE (expr
))
1943 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr
))));
1947 if (first_byte
>= total_bytes
)
1949 total_bytes
-= first_byte
;
1952 unsigned HOST_WIDE_INT rhs_bytes
1953 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1954 if (rhs_bytes
> total_bytes
)
1956 memset (ptr
+ first_byte
, '\0', rhs_bytes
);
1959 return native_encode_expr (expr
, ptr
+ first_byte
, total_bytes
) != 0;
1963 We are writing a non byte-sized quantity or at a position that is not
1965 |--------|--------|--------| ptr + first_byte
1967 xxx xxxxxxxx xxx< bp>
1970 First native_encode_expr EXPR into a temporary buffer and shift each
1971 byte in the buffer by 'bp' (carrying the bits over as necessary).
1972 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1973 <------bitlen---->< bp>
1974 Then we clear the destination bits:
1975 |---00000|00000000|000-----| ptr + first_byte
1976 <-------bitlen--->< bp>
1978 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1979 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1982 We are writing a non byte-sized quantity or at a position that is not
1984 ptr + first_byte |--------|--------|--------|
1986 <bp >xxx xxxxxxxx xxx
1989 First native_encode_expr EXPR into a temporary buffer and shift each
1990 byte in the buffer to the right by (carrying the bits over as necessary).
1991 We shift by as much as needed to align the most significant bit of EXPR
1993 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1994 <---bitlen----> <bp ><-----bitlen----->
1995 Then we clear the destination bits:
1996 ptr + first_byte |-----000||00000000||00000---|
1997 <bp ><-------bitlen----->
1999 Finally we ORR the bytes of the shifted EXPR into the cleared region:
2000 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
2001 The awkwardness comes from the fact that bitpos is counted from the
2002 most significant bit of a byte. */
2004 /* We must be dealing with fixed-size data at this point, since the
2005 total size is also fixed. */
2006 unsigned int byte_size
;
2009 unsigned HOST_WIDE_INT rhs_bytes
2010 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
2011 if (rhs_bytes
> total_bytes
)
2013 byte_size
= rhs_bytes
;
2017 fixed_size_mode mode
2018 = as_a
<fixed_size_mode
> (TYPE_MODE (TREE_TYPE (expr
)));
2021 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)))
2022 : GET_MODE_SIZE (mode
);
2024 /* Allocate an extra byte so that we have space to shift into. */
2026 unsigned char *tmpbuf
= XALLOCAVEC (unsigned char, byte_size
);
2027 memset (tmpbuf
, '\0', byte_size
);
2028 /* The store detection code should only have allowed constants that are
2029 accepted by native_encode_expr or empty ctors. */
2031 && native_encode_expr (expr
, tmpbuf
, byte_size
- 1) == 0)
2034 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
2035 bytes to write. This means it can write more than
2036 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
2037 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
2038 bitlen and zero out the bits that are not relevant as well (that may
2039 contain a sign bit due to sign-extension). */
2040 unsigned int padding
2041 = byte_size
- ROUND_UP (bitlen
, BITS_PER_UNIT
) / BITS_PER_UNIT
- 1;
2042 /* On big-endian the padding is at the 'front' so just skip the initial
2044 if (BYTES_BIG_ENDIAN
)
2047 byte_size
-= padding
;
2049 if (bitlen
% BITS_PER_UNIT
!= 0)
2051 if (BYTES_BIG_ENDIAN
)
2052 clear_bit_region_be (tmpbuf
, BITS_PER_UNIT
- 1,
2053 BITS_PER_UNIT
- (bitlen
% BITS_PER_UNIT
));
2055 clear_bit_region (tmpbuf
, bitlen
,
2056 byte_size
* BITS_PER_UNIT
- bitlen
);
2058 /* Left shifting relies on the last byte being clear if bitlen is
2059 a multiple of BITS_PER_UNIT, which might not be clear if
2060 there are padding bytes. */
2061 else if (!BYTES_BIG_ENDIAN
)
2062 tmpbuf
[byte_size
- 1] = '\0';
2064 /* Clear the bit region in PTR where the bits from TMPBUF will be
2066 if (BYTES_BIG_ENDIAN
)
2067 clear_bit_region_be (ptr
+ first_byte
,
2068 BITS_PER_UNIT
- 1 - (bitpos
% BITS_PER_UNIT
), bitlen
);
2070 clear_bit_region (ptr
+ first_byte
, bitpos
% BITS_PER_UNIT
, bitlen
);
2073 int bitlen_mod
= bitlen
% BITS_PER_UNIT
;
2074 int bitpos_mod
= bitpos
% BITS_PER_UNIT
;
2076 bool skip_byte
= false;
2077 if (BYTES_BIG_ENDIAN
)
2079 /* BITPOS and BITLEN are exactly aligned and no shifting
2081 if (bitpos_mod
+ bitlen_mod
== BITS_PER_UNIT
2082 || (bitpos_mod
== 0 && bitlen_mod
== 0))
2084 /* |. . . . . . . .|
2086 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
2087 of the value until it aligns with 'bp' in the next byte over. */
2088 else if (bitpos_mod
+ bitlen_mod
< BITS_PER_UNIT
)
2090 shift_amnt
= bitlen_mod
+ bitpos_mod
;
2091 skip_byte
= bitlen_mod
!= 0;
2093 /* |. . . . . . . .|
2096 Shift the value right within the same byte so it aligns with 'bp'. */
2098 shift_amnt
= bitlen_mod
+ bitpos_mod
- BITS_PER_UNIT
;
2101 shift_amnt
= bitpos
% BITS_PER_UNIT
;
2103 /* Create the shifted version of EXPR. */
2104 if (!BYTES_BIG_ENDIAN
)
2106 shift_bytes_in_array_left (tmpbuf
, byte_size
, shift_amnt
);
2107 if (shift_amnt
== 0)
2112 gcc_assert (BYTES_BIG_ENDIAN
);
2113 shift_bytes_in_array_right (tmpbuf
, byte_size
, shift_amnt
);
2114 /* If shifting right forced us to move into the next byte skip the now
2123 /* Insert the bits from TMPBUF. */
2124 for (unsigned int i
= 0; i
< byte_size
; i
++)
2125 ptr
[first_byte
+ i
] |= tmpbuf
[i
];
2130 /* Sorting function for store_immediate_info objects.
2131 Sorts them by bitposition. */
2134 sort_by_bitpos (const void *x
, const void *y
)
2136 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
2137 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
2139 if ((*tmp
)->bitpos
< (*tmp2
)->bitpos
)
2141 else if ((*tmp
)->bitpos
> (*tmp2
)->bitpos
)
2144 /* If they are the same let's use the order which is guaranteed to
2146 return (*tmp
)->order
- (*tmp2
)->order
;
2149 /* Sorting function for store_immediate_info objects.
2150 Sorts them by the order field. */
2153 sort_by_order (const void *x
, const void *y
)
2155 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
2156 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
2158 if ((*tmp
)->order
< (*tmp2
)->order
)
2160 else if ((*tmp
)->order
> (*tmp2
)->order
)
2166 /* Initialize a merged_store_group object from a store_immediate_info
2169 merged_store_group::merged_store_group (store_immediate_info
*info
)
2171 start
= info
->bitpos
;
2172 width
= info
->bitsize
;
2173 bitregion_start
= info
->bitregion_start
;
2174 bitregion_end
= info
->bitregion_end
;
2175 /* VAL has memory allocated for it in apply_stores once the group
2176 width has been finalized. */
2179 bit_insertion
= info
->rhs_code
== BIT_INSERT_EXPR
;
2180 string_concatenation
= info
->rhs_code
== STRING_CST
;
2181 only_constants
= info
->rhs_code
== INTEGER_CST
;
2183 first_nonmergeable_order
= ~0U;
2184 lp_nr
= info
->lp_nr
;
2185 unsigned HOST_WIDE_INT align_bitpos
= 0;
2186 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
2187 &align
, &align_bitpos
);
2188 align_base
= start
- align_bitpos
;
2189 for (int i
= 0; i
< 2; ++i
)
2191 store_operand_info
&op
= info
->ops
[i
];
2192 if (op
.base_addr
== NULL_TREE
)
2195 load_align_base
[i
] = 0;
2199 get_object_alignment_1 (op
.val
, &load_align
[i
], &align_bitpos
);
2200 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
2204 stores
.safe_push (info
);
2205 last_stmt
= info
->stmt
;
2206 last_order
= info
->order
;
2207 first_stmt
= last_stmt
;
2208 first_order
= last_order
;
2212 merged_store_group::~merged_store_group ()
2218 /* Return true if the store described by INFO can be merged into the group. */
2221 merged_store_group::can_be_merged_into (store_immediate_info
*info
)
2223 /* Do not merge bswap patterns. */
2224 if (info
->rhs_code
== LROTATE_EXPR
)
2227 if (info
->lp_nr
!= lp_nr
)
2230 /* The canonical case. */
2231 if (info
->rhs_code
== stores
[0]->rhs_code
)
2234 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
2235 if (info
->rhs_code
== BIT_INSERT_EXPR
&& stores
[0]->rhs_code
== INTEGER_CST
)
2236 return !string_concatenation
;
2238 if (stores
[0]->rhs_code
== BIT_INSERT_EXPR
&& info
->rhs_code
== INTEGER_CST
)
2239 return !string_concatenation
;
2241 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2242 only for small regions since this can generate a lot of instructions. */
2243 if (info
->rhs_code
== MEM_REF
2244 && (stores
[0]->rhs_code
== INTEGER_CST
2245 || stores
[0]->rhs_code
== BIT_INSERT_EXPR
)
2246 && info
->bitregion_start
== stores
[0]->bitregion_start
2247 && info
->bitregion_end
== stores
[0]->bitregion_end
2248 && info
->bitregion_end
- info
->bitregion_start
<= MAX_FIXED_MODE_SIZE
)
2249 return !string_concatenation
;
2251 if (stores
[0]->rhs_code
== MEM_REF
2252 && (info
->rhs_code
== INTEGER_CST
2253 || info
->rhs_code
== BIT_INSERT_EXPR
)
2254 && info
->bitregion_start
== stores
[0]->bitregion_start
2255 && info
->bitregion_end
== stores
[0]->bitregion_end
2256 && info
->bitregion_end
- info
->bitregion_start
<= MAX_FIXED_MODE_SIZE
)
2257 return !string_concatenation
;
2259 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2260 if (info
->rhs_code
== STRING_CST
2261 && stores
[0]->rhs_code
== INTEGER_CST
2262 && stores
[0]->bitsize
== CHAR_BIT
)
2263 return !bit_insertion
;
2265 if (stores
[0]->rhs_code
== STRING_CST
2266 && info
->rhs_code
== INTEGER_CST
2267 && info
->bitsize
== CHAR_BIT
)
2268 return !bit_insertion
;
2273 /* Helper method for merge_into and merge_overlapping to do
2277 merged_store_group::do_merge (store_immediate_info
*info
)
2279 bitregion_start
= MIN (bitregion_start
, info
->bitregion_start
);
2280 bitregion_end
= MAX (bitregion_end
, info
->bitregion_end
);
2282 unsigned int this_align
;
2283 unsigned HOST_WIDE_INT align_bitpos
= 0;
2284 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
2285 &this_align
, &align_bitpos
);
2286 if (this_align
> align
)
2289 align_base
= info
->bitpos
- align_bitpos
;
2291 for (int i
= 0; i
< 2; ++i
)
2293 store_operand_info
&op
= info
->ops
[i
];
2297 get_object_alignment_1 (op
.val
, &this_align
, &align_bitpos
);
2298 if (this_align
> load_align
[i
])
2300 load_align
[i
] = this_align
;
2301 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
2305 gimple
*stmt
= info
->stmt
;
2306 stores
.safe_push (info
);
2307 if (info
->order
> last_order
)
2309 last_order
= info
->order
;
2312 else if (info
->order
< first_order
)
2314 first_order
= info
->order
;
2318 if (info
->bitpos
!= start
+ width
)
2319 consecutive
= false;
2321 /* We need to use extraction if there is any bit-field. */
2322 if (info
->rhs_code
== BIT_INSERT_EXPR
)
2324 bit_insertion
= true;
2325 gcc_assert (!string_concatenation
);
2328 /* We want to use concatenation if there is any string. */
2329 if (info
->rhs_code
== STRING_CST
)
2331 string_concatenation
= true;
2332 gcc_assert (!bit_insertion
);
2335 /* But we cannot use it if we don't have consecutive stores. */
2337 string_concatenation
= false;
2339 if (info
->rhs_code
!= INTEGER_CST
)
2340 only_constants
= false;
2343 /* Merge a store recorded by INFO into this merged store.
2344 The store is not overlapping with the existing recorded
2348 merged_store_group::merge_into (store_immediate_info
*info
)
2352 /* Make sure we're inserting in the position we think we're inserting. */
2353 gcc_assert (info
->bitpos
>= start
+ width
2354 && info
->bitregion_start
<= bitregion_end
);
2356 width
= info
->bitpos
+ info
->bitsize
- start
;
2359 /* Merge a store described by INFO into this merged store.
2360 INFO overlaps in some way with the current store (i.e. it's not contiguous
2361 which is handled by merged_store_group::merge_into). */
2364 merged_store_group::merge_overlapping (store_immediate_info
*info
)
2368 /* If the store extends the size of the group, extend the width. */
2369 if (info
->bitpos
+ info
->bitsize
> start
+ width
)
2370 width
= info
->bitpos
+ info
->bitsize
- start
;
2373 /* Go through all the recorded stores in this group in program order and
2374 apply their values to the VAL byte array to create the final merged
2375 value. Return true if the operation succeeded. */
2378 merged_store_group::apply_stores ()
2380 store_immediate_info
*info
;
2383 /* Make sure we have more than one store in the group, otherwise we cannot
2385 if (bitregion_start
% BITS_PER_UNIT
!= 0
2386 || bitregion_end
% BITS_PER_UNIT
!= 0
2387 || stores
.length () == 1)
2390 buf_size
= (bitregion_end
- bitregion_start
) / BITS_PER_UNIT
;
2392 /* Really do string concatenation for large strings only. */
2393 if (buf_size
<= MOVE_MAX
)
2394 string_concatenation
= false;
2396 /* String concatenation only works for byte aligned start and end. */
2397 if (start
% BITS_PER_UNIT
!= 0 || width
% BITS_PER_UNIT
!= 0)
2398 string_concatenation
= false;
2400 /* Create a power-of-2-sized buffer for native_encode_expr. */
2401 if (!string_concatenation
)
2402 buf_size
= 1 << ceil_log2 (buf_size
);
2404 val
= XNEWVEC (unsigned char, 2 * buf_size
);
2405 mask
= val
+ buf_size
;
2406 memset (val
, 0, buf_size
);
2407 memset (mask
, ~0U, buf_size
);
2409 stores
.qsort (sort_by_order
);
2411 FOR_EACH_VEC_ELT (stores
, i
, info
)
2413 unsigned int pos_in_buffer
= info
->bitpos
- bitregion_start
;
2415 if (info
->ops
[0].val
&& info
->ops
[0].base_addr
== NULL_TREE
)
2416 cst
= info
->ops
[0].val
;
2417 else if (info
->ops
[1].val
&& info
->ops
[1].base_addr
== NULL_TREE
)
2418 cst
= info
->ops
[1].val
;
2422 if (cst
&& info
->rhs_code
!= BIT_INSERT_EXPR
)
2423 ret
= encode_tree_to_bitpos (cst
, val
, info
->bitsize
, pos_in_buffer
,
2425 unsigned char *m
= mask
+ (pos_in_buffer
/ BITS_PER_UNIT
);
2426 if (BYTES_BIG_ENDIAN
)
2427 clear_bit_region_be (m
, (BITS_PER_UNIT
- 1
2428 - (pos_in_buffer
% BITS_PER_UNIT
)),
2431 clear_bit_region (m
, pos_in_buffer
% BITS_PER_UNIT
, info
->bitsize
);
2432 if (cst
&& dump_file
&& (dump_flags
& TDF_DETAILS
))
2436 fputs ("After writing ", dump_file
);
2437 print_generic_expr (dump_file
, cst
, TDF_NONE
);
2438 fprintf (dump_file
, " of size " HOST_WIDE_INT_PRINT_DEC
2439 " at position %d\n", info
->bitsize
, pos_in_buffer
);
2440 fputs (" the merged value contains ", dump_file
);
2441 dump_char_array (dump_file
, val
, buf_size
);
2442 fputs (" the merged mask contains ", dump_file
);
2443 dump_char_array (dump_file
, mask
, buf_size
);
2445 fputs (" bit insertion is required\n", dump_file
);
2446 if (string_concatenation
)
2447 fputs (" string concatenation is required\n", dump_file
);
2450 fprintf (dump_file
, "Failed to merge stores\n");
2455 stores
.qsort (sort_by_bitpos
);
2459 /* Structure describing the store chain. */
2461 class imm_store_chain_info
2464 /* Doubly-linked list that imposes an order on chain processing.
2465 PNXP (prev's next pointer) points to the head of a list, or to
2466 the next field in the previous chain in the list.
2467 See pass_store_merging::m_stores_head for more rationale. */
2468 imm_store_chain_info
*next
, **pnxp
;
2470 auto_vec
<store_immediate_info
*> m_store_info
;
2471 auto_vec
<merged_store_group
*> m_merged_store_groups
;
2473 imm_store_chain_info (imm_store_chain_info
*&inspt
, tree b_a
)
2474 : next (inspt
), pnxp (&inspt
), base_addr (b_a
)
2479 gcc_checking_assert (pnxp
== next
->pnxp
);
2483 ~imm_store_chain_info ()
2488 gcc_checking_assert (&next
== next
->pnxp
);
2492 bool terminate_and_process_chain ();
2493 bool try_coalesce_bswap (merged_store_group
*, unsigned int, unsigned int,
2495 bool coalesce_immediate_stores ();
2496 bool output_merged_store (merged_store_group
*);
2497 bool output_merged_stores ();
2500 const pass_data pass_data_tree_store_merging
= {
2501 GIMPLE_PASS
, /* type */
2502 "store-merging", /* name */
2503 OPTGROUP_NONE
, /* optinfo_flags */
2504 TV_GIMPLE_STORE_MERGING
, /* tv_id */
2505 PROP_ssa
, /* properties_required */
2506 0, /* properties_provided */
2507 0, /* properties_destroyed */
2508 0, /* todo_flags_start */
2509 TODO_update_ssa
, /* todo_flags_finish */
2512 class pass_store_merging
: public gimple_opt_pass
2515 pass_store_merging (gcc::context
*ctxt
)
2516 : gimple_opt_pass (pass_data_tree_store_merging
, ctxt
), m_stores_head (),
2517 m_n_chains (0), m_n_stores (0)
2521 /* Pass not supported for PDP-endian, nor for insane hosts or
2522 target character sizes where native_{encode,interpret}_expr
2523 doesn't work properly. */
2525 gate (function
*) final override
2527 return flag_store_merging
2528 && BYTES_BIG_ENDIAN
== WORDS_BIG_ENDIAN
2530 && BITS_PER_UNIT
== 8;
2533 unsigned int execute (function
*) final override
;
2536 hash_map
<tree_operand_hash
, class imm_store_chain_info
*> m_stores
;
2538 /* Form a doubly-linked stack of the elements of m_stores, so that
2539 we can iterate over them in a predictable way. Using this order
2540 avoids extraneous differences in the compiler output just because
2541 of tree pointer variations (e.g. different chains end up in
2542 different positions of m_stores, so they are handled in different
2543 orders, so they allocate or release SSA names in different
2544 orders, and when they get reused, subsequent passes end up
2545 getting different SSA names, which may ultimately change
2546 decisions when going out of SSA). */
2547 imm_store_chain_info
*m_stores_head
;
2549 /* The number of store chains currently tracked. */
2550 unsigned m_n_chains
;
2551 /* The number of stores currently tracked. */
2552 unsigned m_n_stores
;
2554 bool process_store (gimple
*);
2555 bool terminate_and_process_chain (imm_store_chain_info
*);
2556 bool terminate_all_aliasing_chains (imm_store_chain_info
**, gimple
*);
2557 bool terminate_and_process_all_chains ();
2558 }; // class pass_store_merging
2560 /* Terminate and process all recorded chains. Return true if any changes
2564 pass_store_merging::terminate_and_process_all_chains ()
2567 while (m_stores_head
)
2568 ret
|= terminate_and_process_chain (m_stores_head
);
2569 gcc_assert (m_stores
.is_empty ());
2573 /* Terminate all chains that are affected by the statement STMT.
2574 CHAIN_INFO is the chain we should ignore from the checks if
2575 non-NULL. Return true if any changes were made. */
2578 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2584 /* If the statement doesn't touch memory it can't alias. */
2585 if (!gimple_vuse (stmt
))
2588 tree store_lhs
= gimple_store_p (stmt
) ? gimple_get_lhs (stmt
) : NULL_TREE
;
2589 ao_ref store_lhs_ref
;
2590 ao_ref_init (&store_lhs_ref
, store_lhs
);
2591 for (imm_store_chain_info
*next
= m_stores_head
, *cur
= next
; cur
; cur
= next
)
2595 /* We already checked all the stores in chain_info and terminated the
2596 chain if necessary. Skip it here. */
2597 if (chain_info
&& *chain_info
== cur
)
2600 store_immediate_info
*info
;
2602 FOR_EACH_VEC_ELT (cur
->m_store_info
, i
, info
)
2604 tree lhs
= gimple_assign_lhs (info
->stmt
);
2606 ao_ref_init (&lhs_ref
, lhs
);
2607 if (ref_maybe_used_by_stmt_p (stmt
, &lhs_ref
)
2608 || stmt_may_clobber_ref_p_1 (stmt
, &lhs_ref
)
2609 || (store_lhs
&& refs_may_alias_p_1 (&store_lhs_ref
,
2612 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2614 fprintf (dump_file
, "stmt causes chain termination:\n");
2615 print_gimple_stmt (dump_file
, stmt
, 0);
2617 ret
|= terminate_and_process_chain (cur
);
2626 /* Helper function. Terminate the recorded chain storing to base object
2627 BASE. Return true if the merging and output was successful. The m_stores
2628 entry is removed after the processing in any case. */
2631 pass_store_merging::terminate_and_process_chain (imm_store_chain_info
*chain_info
)
2633 m_n_stores
-= chain_info
->m_store_info
.length ();
2635 bool ret
= chain_info
->terminate_and_process_chain ();
2636 m_stores
.remove (chain_info
->base_addr
);
2641 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2642 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2643 be able to sink load of REF across stores between FIRST and LAST, up
2644 to right before LAST. */
2647 stmts_may_clobber_ref_p (gimple
*first
, gimple
*last
, tree ref
)
2650 ao_ref_init (&r
, ref
);
2651 unsigned int count
= 0;
2652 tree vop
= gimple_vdef (last
);
2655 /* Return true conservatively if the basic blocks are different. */
2656 if (gimple_bb (first
) != gimple_bb (last
))
2661 stmt
= SSA_NAME_DEF_STMT (vop
);
2662 if (stmt_may_clobber_ref_p_1 (stmt
, &r
))
2664 if (gimple_store_p (stmt
)
2665 && refs_anti_dependent_p (ref
, gimple_get_lhs (stmt
)))
2667 /* Avoid quadratic compile time by bounding the number of checks
2669 if (++count
> MAX_STORE_ALIAS_CHECKS
)
2671 vop
= gimple_vuse (stmt
);
2673 while (stmt
!= first
);
2678 /* Return true if INFO->ops[IDX] is mergeable with the
2679 corresponding loads already in MERGED_STORE group.
2680 BASE_ADDR is the base address of the whole store group. */
2683 compatible_load_p (merged_store_group
*merged_store
,
2684 store_immediate_info
*info
,
2685 tree base_addr
, int idx
)
2687 store_immediate_info
*infof
= merged_store
->stores
[0];
2688 if (!info
->ops
[idx
].base_addr
2689 || maybe_ne (info
->ops
[idx
].bitpos
- infof
->ops
[idx
].bitpos
,
2690 info
->bitpos
- infof
->bitpos
)
2691 || !operand_equal_p (info
->ops
[idx
].base_addr
,
2692 infof
->ops
[idx
].base_addr
, 0))
2695 store_immediate_info
*infol
= merged_store
->stores
.last ();
2696 tree load_vuse
= gimple_vuse (info
->ops
[idx
].stmt
);
2697 /* In this case all vuses should be the same, e.g.
2698 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2700 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2701 and we can emit the coalesced load next to any of those loads. */
2702 if (gimple_vuse (infof
->ops
[idx
].stmt
) == load_vuse
2703 && gimple_vuse (infol
->ops
[idx
].stmt
) == load_vuse
)
2706 /* Otherwise, at least for now require that the load has the same
2707 vuse as the store. See following examples. */
2708 if (gimple_vuse (info
->stmt
) != load_vuse
)
2711 if (gimple_vuse (infof
->stmt
) != gimple_vuse (infof
->ops
[idx
].stmt
)
2713 && gimple_vuse (infol
->stmt
) != gimple_vuse (infol
->ops
[idx
].stmt
)))
2716 /* If the load is from the same location as the store, already
2717 the construction of the immediate chain info guarantees no intervening
2718 stores, so no further checks are needed. Example:
2719 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2720 if (known_eq (info
->ops
[idx
].bitpos
, info
->bitpos
)
2721 && operand_equal_p (info
->ops
[idx
].base_addr
, base_addr
, 0))
2724 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2725 of the stores in the group, or any other stores in between those.
2726 Previous calls to compatible_load_p ensured that for all the
2727 merged_store->stores IDX loads, no stmts starting with
2728 merged_store->first_stmt and ending right before merged_store->last_stmt
2729 clobbers those loads. */
2730 gimple
*first
= merged_store
->first_stmt
;
2731 gimple
*last
= merged_store
->last_stmt
;
2732 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2733 comes before the so far first load, we'll be changing
2734 merged_store->first_stmt. In that case we need to give up if
2735 any of the earlier processed loads clobber with the stmts in the new
2737 if (info
->order
< merged_store
->first_order
)
2739 for (store_immediate_info
*infoc
: merged_store
->stores
)
2740 if (stmts_may_clobber_ref_p (info
->stmt
, first
, infoc
->ops
[idx
].val
))
2744 /* Similarly, we could change merged_store->last_stmt, so ensure
2745 in that case no stmts in the new range clobber any of the earlier
2747 else if (info
->order
> merged_store
->last_order
)
2749 for (store_immediate_info
*infoc
: merged_store
->stores
)
2750 if (stmts_may_clobber_ref_p (last
, info
->stmt
, infoc
->ops
[idx
].val
))
2754 /* And finally, we'd be adding a new load to the set, ensure it isn't
2755 clobbered in the new range. */
2756 if (stmts_may_clobber_ref_p (first
, last
, info
->ops
[idx
].val
))
2759 /* Otherwise, we are looking for:
2760 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2762 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2766 /* Add all refs loaded to compute VAL to REFS vector. */
2769 gather_bswap_load_refs (vec
<tree
> *refs
, tree val
)
2771 if (TREE_CODE (val
) != SSA_NAME
)
2774 gimple
*stmt
= SSA_NAME_DEF_STMT (val
);
2775 if (!is_gimple_assign (stmt
))
2778 if (gimple_assign_load_p (stmt
))
2780 refs
->safe_push (gimple_assign_rhs1 (stmt
));
2784 switch (gimple_assign_rhs_class (stmt
))
2786 case GIMPLE_BINARY_RHS
:
2787 gather_bswap_load_refs (refs
, gimple_assign_rhs2 (stmt
));
2789 case GIMPLE_UNARY_RHS
:
2790 gather_bswap_load_refs (refs
, gimple_assign_rhs1 (stmt
));
2797 /* Check if there are any stores in M_STORE_INFO after index I
2798 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2799 a potential group ending with END that have their order
2800 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2801 all the stores already merged and the one under consideration
2802 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2804 MEM[(long long int *)p_28] = 0;
2805 MEM[(long long int *)p_28 + 8B] = 0;
2806 MEM[(long long int *)p_28 + 16B] = 0;
2807 MEM[(long long int *)p_28 + 24B] = 0;
2809 MEM[(int *)p_28 + 8B] = _129;
2810 MEM[(int *)p_28].a = -1;
2812 MEM[(long long int *)p_28] = 0;
2813 MEM[(int *)p_28].a = -1;
2814 stmts in the current group and need to consider if it is safe to
2815 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2816 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2817 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2818 into the group and merging of those 3 stores is successful, merged
2819 stmts will be emitted at the latest store from that group, i.e.
2820 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2821 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2822 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2823 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2824 into the group. That way it will be its own store group and will
2825 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2826 INTEGER_CST stores, those are mergeable using merge_overlapping,
2827 so don't return false for those.
2829 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2830 (exclusive), whether they don't overlap the bitrange START to END
2831 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2832 prevent merging in cases like:
2833 MEM <char[12]> [&b + 8B] = {};
2834 MEM[(short *) &b] = 5;
2836 MEM <long long unsigned int> [&b + 2B] = _5;
2837 MEM[(char *)&b + 16B] = 88;
2838 MEM[(int *)&b + 20B] = 1;
2839 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2840 be merged with it, because the = _5 store overlaps these and is in between
2841 them in sort_by_order ordering. If it was merged, the merged store would
2842 go after the = _5 store and thus change behavior. */
2845 check_no_overlap (const vec
<store_immediate_info
*> &m_store_info
,
2847 bool all_integer_cst_p
, unsigned int first_order
,
2848 unsigned int last_order
, unsigned HOST_WIDE_INT start
,
2849 unsigned HOST_WIDE_INT end
, unsigned int first_earlier
,
2850 unsigned end_earlier
)
2852 unsigned int len
= m_store_info
.length ();
2853 for (unsigned int j
= first_earlier
; j
< end_earlier
; j
++)
2855 store_immediate_info
*info
= m_store_info
[j
];
2856 if (info
->order
> first_order
2857 && info
->order
< last_order
2858 && info
->bitpos
+ info
->bitsize
> start
)
2861 for (++i
; i
< len
; ++i
)
2863 store_immediate_info
*info
= m_store_info
[i
];
2864 if (info
->bitpos
>= end
)
2866 if (info
->order
< last_order
2867 && (!all_integer_cst_p
|| info
->rhs_code
!= INTEGER_CST
))
2873 /* Return true if m_store_info[first] and at least one following store
2874 form a group which store try_size bitsize value which is byte swapped
2875 from a memory load or some value, or identity from some value.
2876 This uses the bswap pass APIs. */
2879 imm_store_chain_info::try_coalesce_bswap (merged_store_group
*merged_store
,
2881 unsigned int try_size
,
2882 unsigned int first_earlier
)
2884 unsigned int len
= m_store_info
.length (), last
= first
;
2885 unsigned HOST_WIDE_INT width
= m_store_info
[first
]->bitsize
;
2886 if (width
>= try_size
)
2888 for (unsigned int i
= first
+ 1; i
< len
; ++i
)
2890 if (m_store_info
[i
]->bitpos
!= m_store_info
[first
]->bitpos
+ width
2891 || m_store_info
[i
]->lp_nr
!= merged_store
->lp_nr
2892 || m_store_info
[i
]->ins_stmt
== NULL
)
2894 width
+= m_store_info
[i
]->bitsize
;
2895 if (width
>= try_size
)
2901 if (width
!= try_size
)
2904 bool allow_unaligned
2905 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
2906 /* Punt if the combined store would not be aligned and we need alignment. */
2907 if (!allow_unaligned
)
2909 unsigned int align
= merged_store
->align
;
2910 unsigned HOST_WIDE_INT align_base
= merged_store
->align_base
;
2911 for (unsigned int i
= first
+ 1; i
<= last
; ++i
)
2913 unsigned int this_align
;
2914 unsigned HOST_WIDE_INT align_bitpos
= 0;
2915 get_object_alignment_1 (gimple_assign_lhs (m_store_info
[i
]->stmt
),
2916 &this_align
, &align_bitpos
);
2917 if (this_align
> align
)
2920 align_base
= m_store_info
[i
]->bitpos
- align_bitpos
;
2923 unsigned HOST_WIDE_INT align_bitpos
2924 = (m_store_info
[first
]->bitpos
- align_base
) & (align
- 1);
2926 align
= least_bit_hwi (align_bitpos
);
2927 if (align
< try_size
)
2934 case 16: type
= uint16_type_node
; break;
2935 case 32: type
= uint32_type_node
; break;
2936 case 64: type
= uint64_type_node
; break;
2937 default: gcc_unreachable ();
2939 struct symbolic_number n
;
2940 gimple
*ins_stmt
= NULL
;
2941 int vuse_store
= -1;
2942 unsigned int first_order
= merged_store
->first_order
;
2943 unsigned int last_order
= merged_store
->last_order
;
2944 gimple
*first_stmt
= merged_store
->first_stmt
;
2945 gimple
*last_stmt
= merged_store
->last_stmt
;
2946 unsigned HOST_WIDE_INT end
= merged_store
->start
+ merged_store
->width
;
2947 store_immediate_info
*infof
= m_store_info
[first
];
2949 for (unsigned int i
= first
; i
<= last
; ++i
)
2951 store_immediate_info
*info
= m_store_info
[i
];
2952 struct symbolic_number this_n
= info
->n
;
2954 if (!this_n
.base_addr
)
2955 this_n
.range
= try_size
/ BITS_PER_UNIT
;
2957 /* Update vuse in case it has changed by output_merged_stores. */
2958 this_n
.vuse
= gimple_vuse (info
->ins_stmt
);
2959 unsigned int bitpos
= info
->bitpos
- infof
->bitpos
;
2960 if (!do_shift_rotate (LSHIFT_EXPR
, &this_n
,
2962 ? try_size
- info
->bitsize
- bitpos
2965 if (this_n
.base_addr
&& vuse_store
)
2968 for (j
= first
; j
<= last
; ++j
)
2969 if (this_n
.vuse
== gimple_vuse (m_store_info
[j
]->stmt
))
2973 if (vuse_store
== 1)
2981 ins_stmt
= info
->ins_stmt
;
2985 if (n
.base_addr
&& n
.vuse
!= this_n
.vuse
)
2987 if (vuse_store
== 0)
2991 if (info
->order
> last_order
)
2993 last_order
= info
->order
;
2994 last_stmt
= info
->stmt
;
2996 else if (info
->order
< first_order
)
2998 first_order
= info
->order
;
2999 first_stmt
= info
->stmt
;
3001 end
= MAX (end
, info
->bitpos
+ info
->bitsize
);
3003 ins_stmt
= perform_symbolic_merge (ins_stmt
, &n
, info
->ins_stmt
,
3004 &this_n
, &n
, BIT_IOR_EXPR
);
3005 if (ins_stmt
== NULL
)
3010 uint64_t cmpxchg
, cmpnop
;
3012 find_bswap_or_nop_finalize (&n
, &cmpxchg
, &cmpnop
, &cast64_to_32
);
3014 /* A complete byte swap should make the symbolic number to start with
3015 the largest digit in the highest order byte. Unchanged symbolic
3016 number indicates a read with same endianness as target architecture. */
3017 if (n
.n
!= cmpnop
&& n
.n
!= cmpxchg
)
3024 if (n
.base_addr
== NULL_TREE
&& !is_gimple_val (n
.src
))
3027 if (!check_no_overlap (m_store_info
, last
, false, first_order
, last_order
,
3028 merged_store
->start
, end
, first_earlier
, first
))
3031 /* Don't handle memory copy this way if normal non-bswap processing
3032 would handle it too. */
3033 if (n
.n
== cmpnop
&& (unsigned) n
.n_ops
== last
- first
+ 1)
3036 for (i
= first
; i
<= last
; ++i
)
3037 if (m_store_info
[i
]->rhs_code
!= MEM_REF
)
3047 /* Will emit LROTATE_EXPR. */
3050 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
3051 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)
3055 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
3056 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
3057 || (word_mode
== SImode
3058 && builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
3059 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)))
3066 if (!allow_unaligned
&& n
.base_addr
)
3068 unsigned int align
= get_object_alignment (n
.src
);
3069 if (align
< try_size
)
3073 /* If each load has vuse of the corresponding store, need to verify
3074 the loads can be sunk right before the last store. */
3075 if (vuse_store
== 1)
3077 auto_vec
<tree
, 64> refs
;
3078 for (unsigned int i
= first
; i
<= last
; ++i
)
3079 gather_bswap_load_refs (&refs
,
3080 gimple_assign_rhs1 (m_store_info
[i
]->stmt
));
3082 for (tree ref
: refs
)
3083 if (stmts_may_clobber_ref_p (first_stmt
, last_stmt
, ref
))
3089 infof
->ins_stmt
= ins_stmt
;
3090 for (unsigned int i
= first
; i
<= last
; ++i
)
3092 m_store_info
[i
]->rhs_code
= n
.n
== cmpxchg
? LROTATE_EXPR
: NOP_EXPR
;
3093 m_store_info
[i
]->ops
[0].base_addr
= NULL_TREE
;
3094 m_store_info
[i
]->ops
[1].base_addr
= NULL_TREE
;
3096 merged_store
->merge_into (m_store_info
[i
]);
3102 /* Go through the candidate stores recorded in m_store_info and merge them
3103 into merged_store_group objects recorded into m_merged_store_groups
3104 representing the widened stores. Return true if coalescing was successful
3105 and the number of widened stores is fewer than the original number
3109 imm_store_chain_info::coalesce_immediate_stores ()
3111 /* Anything less can't be processed. */
3112 if (m_store_info
.length () < 2)
3115 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3116 fprintf (dump_file
, "Attempting to coalesce %u stores in chain\n",
3117 m_store_info
.length ());
3119 store_immediate_info
*info
;
3120 unsigned int i
, ignore
= 0;
3121 unsigned int first_earlier
= 0;
3122 unsigned int end_earlier
= 0;
3124 /* Order the stores by the bitposition they write to. */
3125 m_store_info
.qsort (sort_by_bitpos
);
3127 info
= m_store_info
[0];
3128 merged_store_group
*merged_store
= new merged_store_group (info
);
3129 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3130 fputs ("New store group\n", dump_file
);
3132 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
3134 unsigned HOST_WIDE_INT new_bitregion_start
, new_bitregion_end
;
3139 while (first_earlier
< end_earlier
3140 && (m_store_info
[first_earlier
]->bitpos
3141 + m_store_info
[first_earlier
]->bitsize
3142 <= merged_store
->start
))
3145 /* First try to handle group of stores like:
3150 using the bswap framework. */
3151 if (info
->bitpos
== merged_store
->start
+ merged_store
->width
3152 && merged_store
->stores
.length () == 1
3153 && merged_store
->stores
[0]->ins_stmt
!= NULL
3154 && info
->lp_nr
== merged_store
->lp_nr
3155 && info
->ins_stmt
!= NULL
)
3157 unsigned int try_size
;
3158 for (try_size
= 64; try_size
>= 16; try_size
>>= 1)
3159 if (try_coalesce_bswap (merged_store
, i
- 1, try_size
,
3165 ignore
= i
+ merged_store
->stores
.length () - 1;
3166 m_merged_store_groups
.safe_push (merged_store
);
3167 if (ignore
< m_store_info
.length ())
3169 merged_store
= new merged_store_group (m_store_info
[ignore
]);
3170 end_earlier
= ignore
;
3173 merged_store
= NULL
;
3179 = MIN (merged_store
->bitregion_start
, info
->bitregion_start
);
3181 = MAX (merged_store
->bitregion_end
, info
->bitregion_end
);
3183 if (info
->order
>= merged_store
->first_nonmergeable_order
3184 || (((new_bitregion_end
- new_bitregion_start
+ 1) / BITS_PER_UNIT
)
3185 > (unsigned) param_store_merging_max_size
))
3190 Overlapping stores. */
3191 else if (IN_RANGE (info
->bitpos
, merged_store
->start
,
3192 merged_store
->start
+ merged_store
->width
- 1)
3193 /* |---store 1---||---store 2---|
3194 Handle also the consecutive INTEGER_CST stores case here,
3195 as we have here the code to deal with overlaps. */
3196 || (info
->bitregion_start
<= merged_store
->bitregion_end
3197 && info
->rhs_code
== INTEGER_CST
3198 && merged_store
->only_constants
3199 && merged_store
->can_be_merged_into (info
)))
3201 /* Only allow overlapping stores of constants. */
3202 if (info
->rhs_code
== INTEGER_CST
3203 && merged_store
->only_constants
3204 && info
->lp_nr
== merged_store
->lp_nr
)
3206 unsigned int first_order
3207 = MIN (merged_store
->first_order
, info
->order
);
3208 unsigned int last_order
3209 = MAX (merged_store
->last_order
, info
->order
);
3210 unsigned HOST_WIDE_INT end
3211 = MAX (merged_store
->start
+ merged_store
->width
,
3212 info
->bitpos
+ info
->bitsize
);
3213 if (check_no_overlap (m_store_info
, i
, true, first_order
,
3214 last_order
, merged_store
->start
, end
,
3215 first_earlier
, end_earlier
))
3217 /* check_no_overlap call above made sure there are no
3218 overlapping stores with non-INTEGER_CST rhs_code
3219 in between the first and last of the stores we've
3220 just merged. If there are any INTEGER_CST rhs_code
3221 stores in between, we need to merge_overlapping them
3222 even if in the sort_by_bitpos order there are other
3223 overlapping stores in between. Keep those stores as is.
3225 MEM[(int *)p_28] = 0;
3226 MEM[(char *)p_28 + 3B] = 1;
3227 MEM[(char *)p_28 + 1B] = 2;
3228 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3229 We can't merge the zero store with the store of two and
3230 not merge anything else, because the store of one is
3231 in the original order in between those two, but in
3232 store_by_bitpos order it comes after the last store that
3233 we can't merge with them. We can merge the first 3 stores
3234 and keep the last store as is though. */
3235 unsigned int len
= m_store_info
.length ();
3236 unsigned int try_order
= last_order
;
3237 unsigned int first_nonmergeable_order
;
3239 bool last_iter
= false;
3243 unsigned int max_order
= 0;
3244 unsigned int min_order
= first_order
;
3245 unsigned first_nonmergeable_int_order
= ~0U;
3246 unsigned HOST_WIDE_INT this_end
= end
;
3248 first_nonmergeable_order
= ~0U;
3249 for (unsigned int j
= i
+ 1; j
< len
; ++j
)
3251 store_immediate_info
*info2
= m_store_info
[j
];
3252 if (info2
->bitpos
>= this_end
)
3254 if (info2
->order
< try_order
)
3256 if (info2
->rhs_code
!= INTEGER_CST
3257 || info2
->lp_nr
!= merged_store
->lp_nr
)
3259 /* Normally check_no_overlap makes sure this
3260 doesn't happen, but if end grows below,
3261 then we need to process more stores than
3262 check_no_overlap verified. Example:
3263 MEM[(int *)p_5] = 0;
3264 MEM[(short *)p_5 + 3B] = 1;
3265 MEM[(char *)p_5 + 4B] = _9;
3266 MEM[(char *)p_5 + 2B] = 2; */
3271 min_order
= MIN (min_order
, info2
->order
);
3272 this_end
= MAX (this_end
,
3273 info2
->bitpos
+ info2
->bitsize
);
3275 else if (info2
->rhs_code
== INTEGER_CST
3276 && info2
->lp_nr
== merged_store
->lp_nr
3279 max_order
= MAX (max_order
, info2
->order
+ 1);
3280 first_nonmergeable_int_order
3281 = MIN (first_nonmergeable_int_order
,
3285 first_nonmergeable_order
3286 = MIN (first_nonmergeable_order
, info2
->order
);
3289 && !check_no_overlap (m_store_info
, len
- 1, true,
3290 min_order
, try_order
,
3291 merged_store
->start
, this_end
,
3292 first_earlier
, end_earlier
))
3296 if (last_order
== try_order
)
3298 /* If this failed, but only because we grew
3299 try_order, retry with the last working one,
3300 so that we merge at least something. */
3301 try_order
= last_order
;
3305 last_order
= try_order
;
3306 /* Retry with a larger try_order to see if we could
3307 merge some further INTEGER_CST stores. */
3309 && (first_nonmergeable_int_order
3310 < first_nonmergeable_order
))
3312 try_order
= MIN (max_order
,
3313 first_nonmergeable_order
);
3316 merged_store
->first_nonmergeable_order
);
3317 if (try_order
> last_order
&& ++attempts
< 16)
3320 first_nonmergeable_order
3321 = MIN (first_nonmergeable_order
,
3322 first_nonmergeable_int_order
);
3330 merged_store
->merge_overlapping (info
);
3332 merged_store
->first_nonmergeable_order
3333 = MIN (merged_store
->first_nonmergeable_order
,
3334 first_nonmergeable_order
);
3336 for (unsigned int j
= i
+ 1; j
<= k
; j
++)
3338 store_immediate_info
*info2
= m_store_info
[j
];
3339 gcc_assert (info2
->bitpos
< end
);
3340 if (info2
->order
< last_order
)
3342 gcc_assert (info2
->rhs_code
== INTEGER_CST
);
3344 merged_store
->merge_overlapping (info2
);
3346 /* Other stores are kept and not merged in any
3355 /* |---store 1---||---store 2---|
3356 This store is consecutive to the previous one.
3357 Merge it into the current store group. There can be gaps in between
3358 the stores, but there can't be gaps in between bitregions. */
3359 else if (info
->bitregion_start
<= merged_store
->bitregion_end
3360 && merged_store
->can_be_merged_into (info
))
3362 store_immediate_info
*infof
= merged_store
->stores
[0];
3364 /* All the rhs_code ops that take 2 operands are commutative,
3365 swap the operands if it could make the operands compatible. */
3366 if (infof
->ops
[0].base_addr
3367 && infof
->ops
[1].base_addr
3368 && info
->ops
[0].base_addr
3369 && info
->ops
[1].base_addr
3370 && known_eq (info
->ops
[1].bitpos
- infof
->ops
[0].bitpos
,
3371 info
->bitpos
- infof
->bitpos
)
3372 && operand_equal_p (info
->ops
[1].base_addr
,
3373 infof
->ops
[0].base_addr
, 0))
3375 std::swap (info
->ops
[0], info
->ops
[1]);
3376 info
->ops_swapped_p
= true;
3378 if (check_no_overlap (m_store_info
, i
, false,
3379 MIN (merged_store
->first_order
, info
->order
),
3380 MAX (merged_store
->last_order
, info
->order
),
3381 merged_store
->start
,
3382 MAX (merged_store
->start
+ merged_store
->width
,
3383 info
->bitpos
+ info
->bitsize
),
3384 first_earlier
, end_earlier
))
3386 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3387 if (info
->rhs_code
== MEM_REF
&& infof
->rhs_code
!= MEM_REF
)
3389 info
->rhs_code
= BIT_INSERT_EXPR
;
3390 info
->ops
[0].val
= gimple_assign_rhs1 (info
->stmt
);
3391 info
->ops
[0].base_addr
= NULL_TREE
;
3393 else if (infof
->rhs_code
== MEM_REF
&& info
->rhs_code
!= MEM_REF
)
3395 for (store_immediate_info
*infoj
: merged_store
->stores
)
3397 infoj
->rhs_code
= BIT_INSERT_EXPR
;
3398 infoj
->ops
[0].val
= gimple_assign_rhs1 (infoj
->stmt
);
3399 infoj
->ops
[0].base_addr
= NULL_TREE
;
3401 merged_store
->bit_insertion
= true;
3403 if ((infof
->ops
[0].base_addr
3404 ? compatible_load_p (merged_store
, info
, base_addr
, 0)
3405 : !info
->ops
[0].base_addr
)
3406 && (infof
->ops
[1].base_addr
3407 ? compatible_load_p (merged_store
, info
, base_addr
, 1)
3408 : !info
->ops
[1].base_addr
))
3410 merged_store
->merge_into (info
);
3416 /* |---store 1---| <gap> |---store 2---|.
3417 Gap between stores or the rhs not compatible. Start a new group. */
3419 /* Try to apply all the stores recorded for the group to determine
3420 the bitpattern they write and discard it if that fails.
3421 This will also reject single-store groups. */
3422 if (merged_store
->apply_stores ())
3423 m_merged_store_groups
.safe_push (merged_store
);
3425 delete merged_store
;
3427 merged_store
= new merged_store_group (info
);
3429 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3430 fputs ("New store group\n", dump_file
);
3433 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3435 fprintf (dump_file
, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3436 " bitpos:" HOST_WIDE_INT_PRINT_DEC
" val:",
3437 i
, info
->bitsize
, info
->bitpos
);
3438 print_generic_expr (dump_file
, gimple_assign_rhs1 (info
->stmt
));
3439 fputc ('\n', dump_file
);
3443 /* Record or discard the last store group. */
3446 if (merged_store
->apply_stores ())
3447 m_merged_store_groups
.safe_push (merged_store
);
3449 delete merged_store
;
3452 gcc_assert (m_merged_store_groups
.length () <= m_store_info
.length ());
3455 = !m_merged_store_groups
.is_empty ()
3456 && m_merged_store_groups
.length () < m_store_info
.length ();
3458 if (success
&& dump_file
)
3459 fprintf (dump_file
, "Coalescing successful!\nMerged into %u stores\n",
3460 m_merged_store_groups
.length ());
3465 /* Return the type to use for the merged stores or loads described by STMTS.
3466 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3467 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3468 of the MEM_REFs if any. */
3471 get_alias_type_for_stmts (vec
<gimple
*> &stmts
, bool is_load
,
3472 unsigned short *cliquep
, unsigned short *basep
)
3476 tree type
= NULL_TREE
;
3477 tree ret
= NULL_TREE
;
3481 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
3483 tree ref
= is_load
? gimple_assign_rhs1 (stmt
)
3484 : gimple_assign_lhs (stmt
);
3485 tree type1
= reference_alias_ptr_type (ref
);
3486 tree base
= get_base_address (ref
);
3490 if (TREE_CODE (base
) == MEM_REF
)
3492 *cliquep
= MR_DEPENDENCE_CLIQUE (base
);
3493 *basep
= MR_DEPENDENCE_BASE (base
);
3498 if (!alias_ptr_types_compatible_p (type
, type1
))
3499 ret
= ptr_type_node
;
3500 if (TREE_CODE (base
) != MEM_REF
3501 || *cliquep
!= MR_DEPENDENCE_CLIQUE (base
)
3502 || *basep
!= MR_DEPENDENCE_BASE (base
))
3511 /* Return the location_t information we can find among the statements
3515 get_location_for_stmts (vec
<gimple
*> &stmts
)
3517 for (gimple
*stmt
: stmts
)
3518 if (gimple_has_location (stmt
))
3519 return gimple_location (stmt
);
3521 return UNKNOWN_LOCATION
;
3524 /* Used to decribe a store resulting from splitting a wide store in smaller
3525 regularly-sized stores in split_group. */
3530 unsigned HOST_WIDE_INT bytepos
;
3531 unsigned HOST_WIDE_INT size
;
3532 unsigned HOST_WIDE_INT align
;
3533 auto_vec
<store_immediate_info
*> orig_stores
;
3534 /* True if there is a single orig stmt covering the whole split store. */
3536 split_store (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
3537 unsigned HOST_WIDE_INT
);
3540 /* Simple constructor. */
3542 split_store::split_store (unsigned HOST_WIDE_INT bp
,
3543 unsigned HOST_WIDE_INT sz
,
3544 unsigned HOST_WIDE_INT al
)
3545 : bytepos (bp
), size (sz
), align (al
), orig (false)
3547 orig_stores
.create (0);
3550 /* Record all stores in GROUP that write to the region starting at BITPOS and
3551 is of size BITSIZE. Record infos for such statements in STORES if
3552 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3553 if there is exactly one original store in the range (in that case ignore
3554 clobber stmts, unless there are only clobber stmts). */
3556 static store_immediate_info
*
3557 find_constituent_stores (class merged_store_group
*group
,
3558 vec
<store_immediate_info
*> *stores
,
3559 unsigned int *first
,
3560 unsigned HOST_WIDE_INT bitpos
,
3561 unsigned HOST_WIDE_INT bitsize
)
3563 store_immediate_info
*info
, *ret
= NULL
;
3565 bool second
= false;
3566 bool update_first
= true;
3567 unsigned HOST_WIDE_INT end
= bitpos
+ bitsize
;
3568 for (i
= *first
; group
->stores
.iterate (i
, &info
); ++i
)
3570 unsigned HOST_WIDE_INT stmt_start
= info
->bitpos
;
3571 unsigned HOST_WIDE_INT stmt_end
= stmt_start
+ info
->bitsize
;
3572 if (stmt_end
<= bitpos
)
3574 /* BITPOS passed to this function never decreases from within the
3575 same split_group call, so optimize and don't scan info records
3576 which are known to end before or at BITPOS next time.
3577 Only do it if all stores before this one also pass this. */
3583 update_first
= false;
3585 /* The stores in GROUP are ordered by bitposition so if we're past
3586 the region for this group return early. */
3587 if (stmt_start
>= end
)
3590 if (gimple_clobber_p (info
->stmt
))
3593 stores
->safe_push (info
);
3600 stores
->safe_push (info
);
3601 if (ret
&& !gimple_clobber_p (ret
->stmt
))
3607 else if (ret
&& !gimple_clobber_p (ret
->stmt
))
3615 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3616 store have multiple uses. If any SSA_NAME has multiple uses, also
3617 count statements needed to compute it. */
3620 count_multiple_uses (store_immediate_info
*info
)
3622 gimple
*stmt
= info
->stmt
;
3624 switch (info
->rhs_code
)
3632 if (info
->bit_not_p
)
3634 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3635 ret
= 1; /* Fall through below to return
3636 the BIT_NOT_EXPR stmt and then
3637 BIT_{AND,IOR,XOR}_EXPR and anything it
3640 /* stmt is after this the BIT_NOT_EXPR. */
3641 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3643 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3645 ret
+= 1 + info
->ops
[0].bit_not_p
;
3646 if (info
->ops
[1].base_addr
)
3647 ret
+= 1 + info
->ops
[1].bit_not_p
;
3650 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3651 /* stmt is now the BIT_*_EXPR. */
3652 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3653 ret
+= 1 + info
->ops
[info
->ops_swapped_p
].bit_not_p
;
3654 else if (info
->ops
[info
->ops_swapped_p
].bit_not_p
)
3656 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3657 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3660 if (info
->ops
[1].base_addr
== NULL_TREE
)
3662 gcc_checking_assert (!info
->ops_swapped_p
);
3665 if (!has_single_use (gimple_assign_rhs2 (stmt
)))
3666 ret
+= 1 + info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
;
3667 else if (info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
)
3669 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3670 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3675 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3676 return 1 + info
->ops
[0].bit_not_p
;
3677 else if (info
->ops
[0].bit_not_p
)
3679 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3680 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3684 case BIT_INSERT_EXPR
:
3685 return has_single_use (gimple_assign_rhs1 (stmt
)) ? 0 : 1;
3691 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3692 vector (if non-NULL) with split_store structs describing the byte offset
3693 (from the base), the bit size and alignment of each store as well as the
3694 original statements involved in each such split group.
3695 This is to separate the splitting strategy from the statement
3696 building/emission/linking done in output_merged_store.
3697 Return number of new stores.
3698 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3699 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3700 BZERO_FIRST may be true only when the first store covers the whole group
3701 and clears it; if BZERO_FIRST is true, keep that first store in the set
3702 unmodified and emit further stores for the overrides only.
3703 If SPLIT_STORES is NULL, it is just a dry run to count number of
3707 split_group (merged_store_group
*group
, bool allow_unaligned_store
,
3708 bool allow_unaligned_load
, bool bzero_first
,
3709 vec
<split_store
*> *split_stores
,
3710 unsigned *total_orig
,
3711 unsigned *total_new
)
3713 unsigned HOST_WIDE_INT pos
= group
->bitregion_start
;
3714 unsigned HOST_WIDE_INT size
= group
->bitregion_end
- pos
;
3715 unsigned HOST_WIDE_INT bytepos
= pos
/ BITS_PER_UNIT
;
3716 unsigned HOST_WIDE_INT group_align
= group
->align
;
3717 unsigned HOST_WIDE_INT align_base
= group
->align_base
;
3718 unsigned HOST_WIDE_INT group_load_align
= group_align
;
3719 bool any_orig
= false;
3721 gcc_assert ((size
% BITS_PER_UNIT
== 0) && (pos
% BITS_PER_UNIT
== 0));
3723 /* For bswap framework using sets of stores, all the checking has been done
3724 earlier in try_coalesce_bswap and the result always needs to be emitted
3725 as a single store. Likewise for string concatenation. */
3726 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
3727 || group
->stores
[0]->rhs_code
== NOP_EXPR
3728 || group
->string_concatenation
)
3730 gcc_assert (!bzero_first
);
3733 /* Avoid the old/new stmt count heuristics. It should be
3734 always beneficial. */
3741 unsigned HOST_WIDE_INT align_bitpos
3742 = (group
->start
- align_base
) & (group_align
- 1);
3743 unsigned HOST_WIDE_INT align
= group_align
;
3745 align
= least_bit_hwi (align_bitpos
);
3746 bytepos
= group
->start
/ BITS_PER_UNIT
;
3748 = new split_store (bytepos
, group
->width
, align
);
3749 unsigned int first
= 0;
3750 find_constituent_stores (group
, &store
->orig_stores
,
3751 &first
, group
->start
, group
->width
);
3752 split_stores
->safe_push (store
);
3758 unsigned int ret
= 0, first
= 0;
3759 unsigned HOST_WIDE_INT try_pos
= bytepos
;
3764 store_immediate_info
*info
= group
->stores
[0];
3767 total_orig
[0] = 1; /* The orig store. */
3768 info
= group
->stores
[0];
3769 if (info
->ops
[0].base_addr
)
3771 if (info
->ops
[1].base_addr
)
3773 switch (info
->rhs_code
)
3778 total_orig
[0]++; /* The orig BIT_*_EXPR stmt. */
3783 total_orig
[0] *= group
->stores
.length ();
3785 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
3787 total_new
[0] += count_multiple_uses (info
);
3788 total_orig
[0] += (info
->bit_not_p
3789 + info
->ops
[0].bit_not_p
3790 + info
->ops
[1].bit_not_p
);
3794 if (!allow_unaligned_load
)
3795 for (int i
= 0; i
< 2; ++i
)
3796 if (group
->load_align
[i
])
3797 group_load_align
= MIN (group_load_align
, group
->load_align
[i
]);
3801 store_immediate_info
*gstore
;
3802 FOR_EACH_VEC_ELT (group
->stores
, first
, gstore
)
3803 if (!gimple_clobber_p (gstore
->stmt
))
3810 = new split_store (bytepos
, gstore
->bitsize
, align_base
);
3811 store
->orig_stores
.safe_push (gstore
);
3814 split_stores
->safe_push (store
);
3820 if ((allow_unaligned_store
|| group_align
<= BITS_PER_UNIT
)
3821 && (group
->mask
[try_pos
- bytepos
] == (unsigned char) ~0U
3822 || (bzero_first
&& group
->val
[try_pos
- bytepos
] == 0)))
3824 /* Skip padding bytes. */
3826 size
-= BITS_PER_UNIT
;
3830 unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
3831 unsigned int try_size
= MAX_STORE_BITSIZE
, nonmasked
;
3832 unsigned HOST_WIDE_INT align_bitpos
3833 = (try_bitpos
- align_base
) & (group_align
- 1);
3834 unsigned HOST_WIDE_INT align
= group_align
;
3835 bool found_orig
= false;
3837 align
= least_bit_hwi (align_bitpos
);
3838 if (!allow_unaligned_store
)
3839 try_size
= MIN (try_size
, align
);
3840 if (!allow_unaligned_load
)
3842 /* If we can't do or don't want to do unaligned stores
3843 as well as loads, we need to take the loads into account
3845 unsigned HOST_WIDE_INT load_align
= group_load_align
;
3846 align_bitpos
= (try_bitpos
- align_base
) & (load_align
- 1);
3848 load_align
= least_bit_hwi (align_bitpos
);
3849 for (int i
= 0; i
< 2; ++i
)
3850 if (group
->load_align
[i
])
3853 = known_alignment (try_bitpos
3854 - group
->stores
[0]->bitpos
3855 + group
->stores
[0]->ops
[i
].bitpos
3856 - group
->load_align_base
[i
]);
3857 if (align_bitpos
& (group_load_align
- 1))
3859 unsigned HOST_WIDE_INT a
= least_bit_hwi (align_bitpos
);
3860 load_align
= MIN (load_align
, a
);
3863 try_size
= MIN (try_size
, load_align
);
3865 store_immediate_info
*info
3866 = find_constituent_stores (group
, NULL
, &first
, try_bitpos
, try_size
);
3867 if (info
&& !gimple_clobber_p (info
->stmt
))
3869 /* If there is just one original statement for the range, see if
3870 we can just reuse the original store which could be even larger
3872 unsigned HOST_WIDE_INT stmt_end
3873 = ROUND_UP (info
->bitpos
+ info
->bitsize
, BITS_PER_UNIT
);
3874 info
= find_constituent_stores (group
, NULL
, &first
, try_bitpos
,
3875 stmt_end
- try_bitpos
);
3876 if (info
&& info
->bitpos
>= try_bitpos
)
3878 store_immediate_info
*info2
= NULL
;
3879 unsigned int first_copy
= first
;
3880 if (info
->bitpos
> try_bitpos
3881 && stmt_end
- try_bitpos
<= try_size
)
3883 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3885 info
->bitpos
- try_bitpos
);
3886 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3888 if (info2
== NULL
&& stmt_end
- try_bitpos
< try_size
)
3890 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3892 (try_bitpos
+ try_size
)
3894 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3898 try_size
= stmt_end
- try_bitpos
;
3905 /* Approximate store bitsize for the case when there are no padding
3907 while (try_size
> size
)
3909 /* Now look for whole padding bytes at the end of that bitsize. */
3910 for (nonmasked
= try_size
/ BITS_PER_UNIT
; nonmasked
> 0; --nonmasked
)
3911 if (group
->mask
[try_pos
- bytepos
+ nonmasked
- 1]
3912 != (unsigned char) ~0U
3914 || group
->val
[try_pos
- bytepos
+ nonmasked
- 1] != 0))
3916 if (nonmasked
== 0 || (info
&& gimple_clobber_p (info
->stmt
)))
3918 /* If entire try_size range is padding, skip it. */
3919 try_pos
+= try_size
/ BITS_PER_UNIT
;
3923 /* Otherwise try to decrease try_size if second half, last 3 quarters
3924 etc. are padding. */
3925 nonmasked
*= BITS_PER_UNIT
;
3926 while (nonmasked
<= try_size
/ 2)
3928 if (!allow_unaligned_store
&& group_align
> BITS_PER_UNIT
)
3930 /* Now look for whole padding bytes at the start of that bitsize. */
3931 unsigned int try_bytesize
= try_size
/ BITS_PER_UNIT
, masked
;
3932 for (masked
= 0; masked
< try_bytesize
; ++masked
)
3933 if (group
->mask
[try_pos
- bytepos
+ masked
] != (unsigned char) ~0U
3935 || group
->val
[try_pos
- bytepos
+ masked
] != 0))
3937 masked
*= BITS_PER_UNIT
;
3938 gcc_assert (masked
< try_size
);
3939 if (masked
>= try_size
/ 2)
3941 while (masked
>= try_size
/ 2)
3944 try_pos
+= try_size
/ BITS_PER_UNIT
;
3948 /* Need to recompute the alignment, so just retry at the new
3960 = new split_store (try_pos
, try_size
, align
);
3961 info
= find_constituent_stores (group
, &store
->orig_stores
,
3962 &first
, try_bitpos
, try_size
);
3964 && !gimple_clobber_p (info
->stmt
)
3965 && info
->bitpos
>= try_bitpos
3966 && info
->bitpos
+ info
->bitsize
<= try_bitpos
+ try_size
3967 && (store
->orig_stores
.length () == 1
3969 || (info
->bitpos
== try_bitpos
3970 && (info
->bitpos
+ info
->bitsize
3971 == try_bitpos
+ try_size
))))
3976 split_stores
->safe_push (store
);
3979 try_pos
+= try_size
/ BITS_PER_UNIT
;
3987 /* If we are reusing some original stores and any of the
3988 original SSA_NAMEs had multiple uses, we need to subtract
3989 those now before we add the new ones. */
3990 if (total_new
[0] && any_orig
)
3992 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3994 total_new
[0] -= count_multiple_uses (store
->orig_stores
[0]);
3996 total_new
[0] += ret
; /* The new store. */
3997 store_immediate_info
*info
= group
->stores
[0];
3998 if (info
->ops
[0].base_addr
)
3999 total_new
[0] += ret
;
4000 if (info
->ops
[1].base_addr
)
4001 total_new
[0] += ret
;
4002 switch (info
->rhs_code
)
4007 total_new
[0] += ret
; /* The new BIT_*_EXPR stmt. */
4012 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
4015 bool bit_not_p
[3] = { false, false, false };
4016 /* If all orig_stores have certain bit_not_p set, then
4017 we'd use a BIT_NOT_EXPR stmt and need to account for it.
4018 If some orig_stores have certain bit_not_p set, then
4019 we'd use a BIT_XOR_EXPR with a mask and need to account for
4021 FOR_EACH_VEC_ELT (store
->orig_stores
, j
, info
)
4023 if (info
->ops
[0].bit_not_p
)
4024 bit_not_p
[0] = true;
4025 if (info
->ops
[1].bit_not_p
)
4026 bit_not_p
[1] = true;
4027 if (info
->bit_not_p
)
4028 bit_not_p
[2] = true;
4030 total_new
[0] += bit_not_p
[0] + bit_not_p
[1] + bit_not_p
[2];
4038 /* Return the operation through which the operand IDX (if < 2) or
4039 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
4040 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
4041 the bits should be xored with mask. */
4043 static enum tree_code
4044 invert_op (split_store
*split_store
, int idx
, tree int_type
, tree
&mask
)
4047 store_immediate_info
*info
;
4048 unsigned int cnt
= 0;
4049 bool any_paddings
= false;
4050 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
4052 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
4056 tree lhs
= gimple_assign_lhs (info
->stmt
);
4057 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4058 && TYPE_PRECISION (TREE_TYPE (lhs
)) < info
->bitsize
)
4059 any_paddings
= true;
4065 if (cnt
== split_store
->orig_stores
.length () && !any_paddings
)
4066 return BIT_NOT_EXPR
;
4068 unsigned HOST_WIDE_INT try_bitpos
= split_store
->bytepos
* BITS_PER_UNIT
;
4069 unsigned buf_size
= split_store
->size
/ BITS_PER_UNIT
;
4071 = XALLOCAVEC (unsigned char, buf_size
);
4072 memset (buf
, ~0U, buf_size
);
4073 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
4075 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
4078 /* Clear regions with bit_not_p and invert afterwards, rather than
4079 clear regions with !bit_not_p, so that gaps in between stores aren't
4081 unsigned HOST_WIDE_INT bitsize
= info
->bitsize
;
4082 unsigned HOST_WIDE_INT prec
= bitsize
;
4083 unsigned int pos_in_buffer
= 0;
4086 tree lhs
= gimple_assign_lhs (info
->stmt
);
4087 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4088 && TYPE_PRECISION (TREE_TYPE (lhs
)) < bitsize
)
4089 prec
= TYPE_PRECISION (TREE_TYPE (lhs
));
4091 if (info
->bitpos
< try_bitpos
)
4093 gcc_assert (info
->bitpos
+ bitsize
> try_bitpos
);
4094 if (!BYTES_BIG_ENDIAN
)
4096 if (prec
<= try_bitpos
- info
->bitpos
)
4098 prec
-= try_bitpos
- info
->bitpos
;
4100 bitsize
-= try_bitpos
- info
->bitpos
;
4101 if (BYTES_BIG_ENDIAN
&& prec
> bitsize
)
4105 pos_in_buffer
= info
->bitpos
- try_bitpos
;
4108 /* If this is a bool inversion, invert just the least significant
4109 prec bits rather than all bits of it. */
4110 if (BYTES_BIG_ENDIAN
)
4112 pos_in_buffer
+= bitsize
- prec
;
4113 if (pos_in_buffer
>= split_store
->size
)
4118 if (pos_in_buffer
+ bitsize
> split_store
->size
)
4119 bitsize
= split_store
->size
- pos_in_buffer
;
4120 unsigned char *p
= buf
+ (pos_in_buffer
/ BITS_PER_UNIT
);
4121 if (BYTES_BIG_ENDIAN
)
4122 clear_bit_region_be (p
, (BITS_PER_UNIT
- 1
4123 - (pos_in_buffer
% BITS_PER_UNIT
)), bitsize
);
4125 clear_bit_region (p
, pos_in_buffer
% BITS_PER_UNIT
, bitsize
);
4127 for (unsigned int i
= 0; i
< buf_size
; ++i
)
4129 mask
= native_interpret_expr (int_type
, buf
, buf_size
);
4130 return BIT_XOR_EXPR
;
4133 /* Given a merged store group GROUP output the widened version of it.
4134 The store chain is against the base object BASE.
4135 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4136 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4137 Make sure that the number of statements output is less than the number of
4138 original statements. If a better sequence is possible emit it and
4142 imm_store_chain_info::output_merged_store (merged_store_group
*group
)
4144 const unsigned HOST_WIDE_INT start_byte_pos
4145 = group
->bitregion_start
/ BITS_PER_UNIT
;
4146 unsigned int orig_num_stmts
= group
->stores
.length ();
4147 if (orig_num_stmts
< 2)
4150 bool allow_unaligned_store
4151 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
4152 bool allow_unaligned_load
= allow_unaligned_store
;
4153 bool bzero_first
= false;
4154 store_immediate_info
*store
;
4155 unsigned int num_clobber_stmts
= 0;
4156 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
4159 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
4160 if (gimple_clobber_p (store
->stmt
))
4161 num_clobber_stmts
++;
4162 else if (TREE_CODE (gimple_assign_rhs1 (store
->stmt
)) == CONSTRUCTOR
4163 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store
->stmt
)) == 0
4164 && group
->start
== store
->bitpos
4165 && group
->width
== store
->bitsize
4166 && (group
->start
% BITS_PER_UNIT
) == 0
4167 && (group
->width
% BITS_PER_UNIT
) == 0)
4174 FOR_EACH_VEC_ELT_FROM (group
->stores
, i
, store
, i
)
4175 if (gimple_clobber_p (store
->stmt
))
4176 num_clobber_stmts
++;
4177 if (num_clobber_stmts
== orig_num_stmts
)
4179 orig_num_stmts
-= num_clobber_stmts
;
4181 if (allow_unaligned_store
|| bzero_first
)
4183 /* If unaligned stores are allowed, see how many stores we'd emit
4184 for unaligned and how many stores we'd emit for aligned stores.
4185 Only use unaligned stores if it allows fewer stores than aligned.
4186 Similarly, if there is a whole region clear first, prefer expanding
4187 it together compared to expanding clear first followed by merged
4189 unsigned cnt
[4] = { ~0U, ~0U, ~0U, ~0U };
4191 for (int pass
= 0; pass
< 4; ++pass
)
4193 if (!allow_unaligned_store
&& (pass
& 1) != 0)
4195 if (!bzero_first
&& (pass
& 2) != 0)
4197 cnt
[pass
] = split_group (group
, (pass
& 1) != 0,
4198 allow_unaligned_load
, (pass
& 2) != 0,
4200 if (cnt
[pass
] < cnt
[pass_min
])
4203 if ((pass_min
& 1) == 0)
4204 allow_unaligned_store
= false;
4205 if ((pass_min
& 2) == 0)
4206 bzero_first
= false;
4209 auto_vec
<class split_store
*, 32> split_stores
;
4210 split_store
*split_store
;
4211 unsigned total_orig
, total_new
, i
;
4212 split_group (group
, allow_unaligned_store
, allow_unaligned_load
, bzero_first
,
4213 &split_stores
, &total_orig
, &total_new
);
4215 /* Determine if there is a clobber covering the whole group at the start,
4216 followed by proposed split stores that cover the whole group. In that
4217 case, prefer the transformation even if
4218 split_stores.length () == orig_num_stmts. */
4219 bool clobber_first
= false;
4220 if (num_clobber_stmts
4221 && gimple_clobber_p (group
->stores
[0]->stmt
)
4222 && group
->start
== group
->stores
[0]->bitpos
4223 && group
->width
== group
->stores
[0]->bitsize
4224 && (group
->start
% BITS_PER_UNIT
) == 0
4225 && (group
->width
% BITS_PER_UNIT
) == 0)
4227 clobber_first
= true;
4228 unsigned HOST_WIDE_INT pos
= group
->start
/ BITS_PER_UNIT
;
4229 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4230 if (split_store
->bytepos
!= pos
)
4232 clobber_first
= false;
4236 pos
+= split_store
->size
/ BITS_PER_UNIT
;
4237 if (pos
!= (group
->start
+ group
->width
) / BITS_PER_UNIT
)
4238 clobber_first
= false;
4241 if (split_stores
.length () >= orig_num_stmts
+ clobber_first
)
4244 /* We didn't manage to reduce the number of statements. Bail out. */
4245 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4246 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
4247 " Not profitable to emit new sequence.\n",
4249 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4253 if (total_orig
<= total_new
)
4255 /* If number of estimated new statements is above estimated original
4256 statements, bail out too. */
4257 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4258 fprintf (dump_file
, "Estimated number of original stmts (%u)"
4259 " not larger than estimated number of new"
4261 total_orig
, total_new
);
4262 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4266 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
4268 bool all_orig
= true;
4269 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4270 if (!split_store
->orig
)
4277 unsigned int cnt
= split_stores
.length ();
4278 store_immediate_info
*store
;
4279 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
4280 if (gimple_clobber_p (store
->stmt
))
4282 /* Punt if we wouldn't make any real changes, i.e. keep all
4283 orig stmts + all clobbers. */
4284 if (cnt
== group
->stores
.length ())
4286 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4287 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
4288 " Not profitable to emit new sequence.\n",
4290 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4297 gimple_stmt_iterator last_gsi
= gsi_for_stmt (group
->last_stmt
);
4298 gimple_seq seq
= NULL
;
4299 tree last_vdef
, new_vuse
;
4300 last_vdef
= gimple_vdef (group
->last_stmt
);
4301 new_vuse
= gimple_vuse (group
->last_stmt
);
4302 tree bswap_res
= NULL_TREE
;
4304 /* Clobbers are not removed. */
4305 if (gimple_clobber_p (group
->last_stmt
))
4307 new_vuse
= make_ssa_name (gimple_vop (cfun
), group
->last_stmt
);
4308 gimple_set_vdef (group
->last_stmt
, new_vuse
);
4311 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
4312 || group
->stores
[0]->rhs_code
== NOP_EXPR
)
4314 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
4315 gimple
*ins_stmt
= group
->stores
[0]->ins_stmt
;
4316 struct symbolic_number
*n
= &group
->stores
[0]->n
;
4317 bool bswap
= group
->stores
[0]->rhs_code
== LROTATE_EXPR
;
4322 load_type
= bswap_type
= uint16_type_node
;
4325 load_type
= uint32_type_node
;
4328 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
4329 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
4333 load_type
= uint64_type_node
;
4336 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
4337 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
4344 /* If the loads have each vuse of the corresponding store,
4345 we've checked the aliasing already in try_coalesce_bswap and
4346 we want to sink the need load into seq. So need to use new_vuse
4350 if (n
->vuse
== NULL
)
4356 /* Update vuse in case it has changed by output_merged_stores. */
4357 n
->vuse
= gimple_vuse (ins_stmt
);
4359 bswap_res
= bswap_replace (gsi_start (seq
), ins_stmt
, fndecl
,
4360 bswap_type
, load_type
, n
, bswap
,
4362 gcc_assert (bswap_res
);
4365 gimple
*stmt
= NULL
;
4366 auto_vec
<gimple
*, 32> orig_stmts
;
4367 gimple_seq this_seq
;
4368 tree addr
= force_gimple_operand_1 (unshare_expr (base_addr
), &this_seq
,
4369 is_gimple_mem_ref_addr
, NULL_TREE
);
4370 gimple_seq_add_seq_without_update (&seq
, this_seq
);
4372 tree load_addr
[2] = { NULL_TREE
, NULL_TREE
};
4373 gimple_seq load_seq
[2] = { NULL
, NULL
};
4374 gimple_stmt_iterator load_gsi
[2] = { gsi_none (), gsi_none () };
4375 for (int j
= 0; j
< 2; ++j
)
4377 store_operand_info
&op
= group
->stores
[0]->ops
[j
];
4378 if (op
.base_addr
== NULL_TREE
)
4381 store_immediate_info
*infol
= group
->stores
.last ();
4382 if (gimple_vuse (op
.stmt
) == gimple_vuse (infol
->ops
[j
].stmt
))
4384 /* We can't pick the location randomly; while we've verified
4385 all the loads have the same vuse, they can be still in different
4386 basic blocks and we need to pick the one from the last bb:
4392 otherwise if we put the wider load at the q[0] load, we might
4393 segfault if q[1] is not mapped. */
4394 basic_block bb
= gimple_bb (op
.stmt
);
4395 gimple
*ostmt
= op
.stmt
;
4396 store_immediate_info
*info
;
4397 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
4399 gimple
*tstmt
= info
->ops
[j
].stmt
;
4400 basic_block tbb
= gimple_bb (tstmt
);
4401 if (dominated_by_p (CDI_DOMINATORS
, tbb
, bb
))
4407 load_gsi
[j
] = gsi_for_stmt (ostmt
);
4409 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
4410 &load_seq
[j
], is_gimple_mem_ref_addr
,
4413 else if (operand_equal_p (base_addr
, op
.base_addr
, 0))
4414 load_addr
[j
] = addr
;
4418 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
4419 &this_seq
, is_gimple_mem_ref_addr
,
4421 gimple_seq_add_seq_without_update (&seq
, this_seq
);
4425 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4427 const unsigned HOST_WIDE_INT try_size
= split_store
->size
;
4428 const unsigned HOST_WIDE_INT try_pos
= split_store
->bytepos
;
4429 const unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
4430 const unsigned HOST_WIDE_INT try_align
= split_store
->align
;
4431 const unsigned HOST_WIDE_INT try_offset
= try_pos
- start_byte_pos
;
4435 if (split_store
->orig
)
4437 /* If there is just a single non-clobber constituent store
4438 which covers the whole area, just reuse the lhs and rhs. */
4439 gimple
*orig_stmt
= NULL
;
4440 store_immediate_info
*store
;
4442 FOR_EACH_VEC_ELT (split_store
->orig_stores
, j
, store
)
4443 if (!gimple_clobber_p (store
->stmt
))
4445 orig_stmt
= store
->stmt
;
4448 dest
= gimple_assign_lhs (orig_stmt
);
4449 src
= gimple_assign_rhs1 (orig_stmt
);
4450 loc
= gimple_location (orig_stmt
);
4454 store_immediate_info
*info
;
4455 unsigned short clique
, base
;
4457 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4458 orig_stmts
.safe_push (info
->stmt
);
4460 = get_alias_type_for_stmts (orig_stmts
, false, &clique
, &base
);
4462 loc
= get_location_for_stmts (orig_stmts
);
4463 orig_stmts
.truncate (0);
4465 if (group
->string_concatenation
)
4467 = build_array_type_nelts (char_type_node
,
4468 try_size
/ BITS_PER_UNIT
);
4471 dest_type
= build_nonstandard_integer_type (try_size
, UNSIGNED
);
4472 dest_type
= build_aligned_type (dest_type
, try_align
);
4474 dest
= fold_build2 (MEM_REF
, dest_type
, addr
,
4475 build_int_cst (offset_type
, try_pos
));
4476 if (TREE_CODE (dest
) == MEM_REF
)
4478 MR_DEPENDENCE_CLIQUE (dest
) = clique
;
4479 MR_DEPENDENCE_BASE (dest
) = base
;
4483 if (bswap_res
|| group
->string_concatenation
)
4484 mask
= integer_zero_node
;
4486 mask
= native_interpret_expr (dest_type
,
4487 group
->mask
+ try_offset
,
4492 j
< 1 + (split_store
->orig_stores
[0]->ops
[1].val
!= NULL_TREE
);
4495 store_operand_info
&op
= split_store
->orig_stores
[0]->ops
[j
];
4498 else if (group
->string_concatenation
)
4500 ops
[j
] = build_string (try_size
/ BITS_PER_UNIT
,
4501 (const char *) group
->val
+ try_offset
);
4502 TREE_TYPE (ops
[j
]) = dest_type
;
4504 else if (op
.base_addr
)
4506 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4507 orig_stmts
.safe_push (info
->ops
[j
].stmt
);
4509 offset_type
= get_alias_type_for_stmts (orig_stmts
, true,
4511 location_t load_loc
= get_location_for_stmts (orig_stmts
);
4512 orig_stmts
.truncate (0);
4514 unsigned HOST_WIDE_INT load_align
= group
->load_align
[j
];
4515 unsigned HOST_WIDE_INT align_bitpos
4516 = known_alignment (try_bitpos
4517 - split_store
->orig_stores
[0]->bitpos
4519 if (align_bitpos
& (load_align
- 1))
4520 load_align
= least_bit_hwi (align_bitpos
);
4523 = build_nonstandard_integer_type (try_size
, UNSIGNED
);
4525 = build_aligned_type (load_int_type
, load_align
);
4527 poly_uint64 load_pos
4528 = exact_div (try_bitpos
4529 - split_store
->orig_stores
[0]->bitpos
4532 ops
[j
] = fold_build2 (MEM_REF
, load_int_type
, load_addr
[j
],
4533 build_int_cst (offset_type
, load_pos
));
4534 if (TREE_CODE (ops
[j
]) == MEM_REF
)
4536 MR_DEPENDENCE_CLIQUE (ops
[j
]) = clique
;
4537 MR_DEPENDENCE_BASE (ops
[j
]) = base
;
4539 if (!integer_zerop (mask
))
4541 /* The load might load some bits (that will be masked
4542 off later on) uninitialized, avoid -W*uninitialized
4543 warnings in that case. */
4544 suppress_warning (ops
[j
], OPT_Wuninitialized
);
4547 stmt
= gimple_build_assign (make_ssa_name (dest_type
), ops
[j
]);
4548 gimple_set_location (stmt
, load_loc
);
4549 if (gsi_bb (load_gsi
[j
]))
4551 gimple_set_vuse (stmt
, gimple_vuse (op
.stmt
));
4552 gimple_seq_add_stmt_without_update (&load_seq
[j
], stmt
);
4556 gimple_set_vuse (stmt
, new_vuse
);
4557 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4559 ops
[j
] = gimple_assign_lhs (stmt
);
4561 enum tree_code inv_op
4562 = invert_op (split_store
, j
, dest_type
, xor_mask
);
4563 if (inv_op
!= NOP_EXPR
)
4565 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4566 inv_op
, ops
[j
], xor_mask
);
4567 gimple_set_location (stmt
, load_loc
);
4568 ops
[j
] = gimple_assign_lhs (stmt
);
4570 if (gsi_bb (load_gsi
[j
]))
4571 gimple_seq_add_stmt_without_update (&load_seq
[j
],
4574 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4578 ops
[j
] = native_interpret_expr (dest_type
,
4579 group
->val
+ try_offset
,
4583 switch (split_store
->orig_stores
[0]->rhs_code
)
4588 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4590 tree rhs1
= gimple_assign_rhs1 (info
->stmt
);
4591 orig_stmts
.safe_push (SSA_NAME_DEF_STMT (rhs1
));
4594 bit_loc
= get_location_for_stmts (orig_stmts
);
4595 orig_stmts
.truncate (0);
4598 = gimple_build_assign (make_ssa_name (dest_type
),
4599 split_store
->orig_stores
[0]->rhs_code
,
4601 gimple_set_location (stmt
, bit_loc
);
4602 /* If there is just one load and there is a separate
4603 load_seq[0], emit the bitwise op right after it. */
4604 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4605 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4606 /* Otherwise, if at least one load is in seq, we need to
4607 emit the bitwise op right before the store. If there
4608 are two loads and are emitted somewhere else, it would
4609 be better to emit the bitwise op as early as possible;
4610 we don't track where that would be possible right now
4613 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4614 src
= gimple_assign_lhs (stmt
);
4616 enum tree_code inv_op
;
4617 inv_op
= invert_op (split_store
, 2, dest_type
, xor_mask
);
4618 if (inv_op
!= NOP_EXPR
)
4620 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4621 inv_op
, src
, xor_mask
);
4622 gimple_set_location (stmt
, bit_loc
);
4623 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4624 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4626 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4627 src
= gimple_assign_lhs (stmt
);
4633 if (!is_gimple_val (src
))
4635 stmt
= gimple_build_assign (make_ssa_name (TREE_TYPE (src
)),
4637 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4638 src
= gimple_assign_lhs (stmt
);
4640 if (!useless_type_conversion_p (dest_type
, TREE_TYPE (src
)))
4642 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4644 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4645 src
= gimple_assign_lhs (stmt
);
4647 inv_op
= invert_op (split_store
, 2, dest_type
, xor_mask
);
4648 if (inv_op
!= NOP_EXPR
)
4650 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4651 inv_op
, src
, xor_mask
);
4652 gimple_set_location (stmt
, loc
);
4653 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4654 src
= gimple_assign_lhs (stmt
);
4662 /* If bit insertion is required, we use the source as an accumulator
4663 into which the successive bit-field values are manually inserted.
4664 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4665 if (group
->bit_insertion
)
4666 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4667 if (info
->rhs_code
== BIT_INSERT_EXPR
4668 && info
->bitpos
< try_bitpos
+ try_size
4669 && info
->bitpos
+ info
->bitsize
> try_bitpos
)
4671 /* Mask, truncate, convert to final type, shift and ior into
4672 the accumulator. Note that every step can be a no-op. */
4673 const HOST_WIDE_INT start_gap
= info
->bitpos
- try_bitpos
;
4674 const HOST_WIDE_INT end_gap
4675 = (try_bitpos
+ try_size
) - (info
->bitpos
+ info
->bitsize
);
4676 tree tem
= info
->ops
[0].val
;
4677 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem
)))
4679 const unsigned HOST_WIDE_INT size
4680 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem
)));
4682 = build_nonstandard_integer_type (size
, UNSIGNED
);
4683 tem
= gimple_build (&seq
, loc
, VIEW_CONVERT_EXPR
,
4686 if (TYPE_PRECISION (TREE_TYPE (tem
)) <= info
->bitsize
)
4689 = build_nonstandard_integer_type (info
->bitsize
,
4691 tem
= gimple_convert (&seq
, loc
, bitfield_type
, tem
);
4693 else if ((BYTES_BIG_ENDIAN
? start_gap
: end_gap
) > 0)
4696 = wi::mask (info
->bitsize
, false,
4697 TYPE_PRECISION (TREE_TYPE (tem
)));
4698 tem
= gimple_build (&seq
, loc
,
4699 BIT_AND_EXPR
, TREE_TYPE (tem
), tem
,
4700 wide_int_to_tree (TREE_TYPE (tem
),
4703 const HOST_WIDE_INT shift
4704 = (BYTES_BIG_ENDIAN
? end_gap
: start_gap
);
4706 tem
= gimple_build (&seq
, loc
,
4707 RSHIFT_EXPR
, TREE_TYPE (tem
), tem
,
4708 build_int_cst (NULL_TREE
, -shift
));
4709 tem
= gimple_convert (&seq
, loc
, dest_type
, tem
);
4711 tem
= gimple_build (&seq
, loc
,
4712 LSHIFT_EXPR
, dest_type
, tem
,
4713 build_int_cst (NULL_TREE
, shift
));
4714 src
= gimple_build (&seq
, loc
,
4715 BIT_IOR_EXPR
, dest_type
, tem
, src
);
4718 if (!integer_zerop (mask
))
4720 tree tem
= make_ssa_name (dest_type
);
4721 tree load_src
= unshare_expr (dest
);
4722 /* The load might load some or all bits uninitialized,
4723 avoid -W*uninitialized warnings in that case.
4724 As optimization, it would be nice if all the bits are
4725 provably uninitialized (no stores at all yet or previous
4726 store a CLOBBER) we'd optimize away the load and replace
4728 suppress_warning (load_src
, OPT_Wuninitialized
);
4729 stmt
= gimple_build_assign (tem
, load_src
);
4730 gimple_set_location (stmt
, loc
);
4731 gimple_set_vuse (stmt
, new_vuse
);
4732 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4734 /* FIXME: If there is a single chunk of zero bits in mask,
4735 perhaps use BIT_INSERT_EXPR instead? */
4736 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4737 BIT_AND_EXPR
, tem
, mask
);
4738 gimple_set_location (stmt
, loc
);
4739 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4740 tem
= gimple_assign_lhs (stmt
);
4742 if (TREE_CODE (src
) == INTEGER_CST
)
4743 src
= wide_int_to_tree (dest_type
,
4744 wi::bit_and_not (wi::to_wide (src
),
4745 wi::to_wide (mask
)));
4749 = wide_int_to_tree (dest_type
,
4750 wi::bit_not (wi::to_wide (mask
)));
4751 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4752 BIT_AND_EXPR
, src
, nmask
);
4753 gimple_set_location (stmt
, loc
);
4754 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4755 src
= gimple_assign_lhs (stmt
);
4757 stmt
= gimple_build_assign (make_ssa_name (dest_type
),
4758 BIT_IOR_EXPR
, tem
, src
);
4759 gimple_set_location (stmt
, loc
);
4760 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4761 src
= gimple_assign_lhs (stmt
);
4765 stmt
= gimple_build_assign (dest
, src
);
4766 gimple_set_location (stmt
, loc
);
4767 gimple_set_vuse (stmt
, new_vuse
);
4768 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4770 if (group
->lp_nr
&& stmt_could_throw_p (cfun
, stmt
))
4771 add_stmt_to_eh_lp (stmt
, group
->lp_nr
);
4774 if (i
< split_stores
.length () - 1)
4775 new_vdef
= make_ssa_name (gimple_vop (cfun
), stmt
);
4777 new_vdef
= last_vdef
;
4779 gimple_set_vdef (stmt
, new_vdef
);
4780 SSA_NAME_DEF_STMT (new_vdef
) = stmt
;
4781 new_vuse
= new_vdef
;
4784 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4791 "New sequence of %u stores to replace old one of %u stores\n",
4792 split_stores
.length (), orig_num_stmts
);
4793 if (dump_flags
& TDF_DETAILS
)
4794 print_gimple_seq (dump_file
, seq
, 0, TDF_VOPS
| TDF_MEMSYMS
);
4797 if (gimple_clobber_p (group
->last_stmt
))
4798 update_stmt (group
->last_stmt
);
4800 if (group
->lp_nr
> 0)
4802 /* We're going to insert a sequence of (potentially) throwing stores
4803 into an active EH region. This means that we're going to create
4804 new basic blocks with EH edges pointing to the post landing pad
4805 and, therefore, to have to update its PHI nodes, if any. For the
4806 virtual PHI node, we're going to use the VDEFs created above, but
4807 for the other nodes, we need to record the original reaching defs. */
4808 eh_landing_pad lp
= get_eh_landing_pad_from_number (group
->lp_nr
);
4809 basic_block lp_bb
= label_to_block (cfun
, lp
->post_landing_pad
);
4810 basic_block last_bb
= gimple_bb (group
->last_stmt
);
4811 edge last_edge
= find_edge (last_bb
, lp_bb
);
4812 auto_vec
<tree
, 16> last_defs
;
4814 for (gpi
= gsi_start_phis (lp_bb
); !gsi_end_p (gpi
); gsi_next (&gpi
))
4816 gphi
*phi
= gpi
.phi ();
4818 if (virtual_operand_p (gimple_phi_result (phi
)))
4819 last_def
= NULL_TREE
;
4821 last_def
= gimple_phi_arg_def (phi
, last_edge
->dest_idx
);
4822 last_defs
.safe_push (last_def
);
4825 /* Do the insertion. Then, if new basic blocks have been created in the
4826 process, rewind the chain of VDEFs create above to walk the new basic
4827 blocks and update the corresponding arguments of the PHI nodes. */
4828 update_modified_stmts (seq
);
4829 if (gimple_find_sub_bbs (seq
, &last_gsi
))
4830 while (last_vdef
!= gimple_vuse (group
->last_stmt
))
4832 gimple
*stmt
= SSA_NAME_DEF_STMT (last_vdef
);
4833 if (stmt_could_throw_p (cfun
, stmt
))
4835 edge new_edge
= find_edge (gimple_bb (stmt
), lp_bb
);
4837 for (gpi
= gsi_start_phis (lp_bb
), i
= 0;
4839 gsi_next (&gpi
), i
++)
4841 gphi
*phi
= gpi
.phi ();
4843 if (virtual_operand_p (gimple_phi_result (phi
)))
4844 new_def
= last_vdef
;
4846 new_def
= last_defs
[i
];
4847 add_phi_arg (phi
, new_def
, new_edge
, UNKNOWN_LOCATION
);
4850 last_vdef
= gimple_vuse (stmt
);
4854 gsi_insert_seq_after (&last_gsi
, seq
, GSI_SAME_STMT
);
4856 for (int j
= 0; j
< 2; ++j
)
4858 gsi_insert_seq_after (&load_gsi
[j
], load_seq
[j
], GSI_SAME_STMT
);
4863 /* Process the merged_store_group objects created in the coalescing phase.
4864 The stores are all against the base object BASE.
4865 Try to output the widened stores and delete the original statements if
4866 successful. Return true iff any changes were made. */
4869 imm_store_chain_info::output_merged_stores ()
4872 merged_store_group
*merged_store
;
4874 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_store
)
4876 if (dbg_cnt (store_merging
)
4877 && output_merged_store (merged_store
))
4880 store_immediate_info
*store
;
4881 FOR_EACH_VEC_ELT (merged_store
->stores
, j
, store
)
4883 gimple
*stmt
= store
->stmt
;
4884 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4885 /* Don't remove clobbers, they are still useful even if
4886 everything is overwritten afterwards. */
4887 if (gimple_clobber_p (stmt
))
4889 gsi_remove (&gsi
, true);
4891 remove_stmt_from_eh_lp (stmt
);
4892 if (stmt
!= merged_store
->last_stmt
)
4894 unlink_stmt_vdef (stmt
);
4895 release_defs (stmt
);
4901 if (ret
&& dump_file
)
4902 fprintf (dump_file
, "Merging successful!\n");
4907 /* Coalesce the store_immediate_info objects recorded against the base object
4908 BASE in the first phase and output them.
4909 Delete the allocated structures.
4910 Return true if any changes were made. */
4913 imm_store_chain_info::terminate_and_process_chain ()
4915 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4916 fprintf (dump_file
, "Terminating chain with %u stores\n",
4917 m_store_info
.length ());
4918 /* Process store chain. */
4920 if (m_store_info
.length () > 1)
4922 ret
= coalesce_immediate_stores ();
4924 ret
= output_merged_stores ();
4927 /* Delete all the entries we allocated ourselves. */
4928 store_immediate_info
*info
;
4930 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
4933 merged_store_group
*merged_info
;
4934 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_info
)
4940 /* Return true iff LHS is a destination potentially interesting for
4941 store merging. In practice these are the codes that get_inner_reference
4945 lhs_valid_for_store_merging_p (tree lhs
)
4950 switch (TREE_CODE (lhs
))
4953 case ARRAY_RANGE_REF
:
4957 case VIEW_CONVERT_EXPR
:
4964 /* Return true if the tree RHS is a constant we want to consider
4965 during store merging. In practice accept all codes that
4966 native_encode_expr accepts. */
4969 rhs_valid_for_store_merging_p (tree rhs
)
4971 unsigned HOST_WIDE_INT size
;
4972 if (TREE_CODE (rhs
) == CONSTRUCTOR
4973 && CONSTRUCTOR_NELTS (rhs
) == 0
4974 && TYPE_SIZE_UNIT (TREE_TYPE (rhs
))
4975 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs
))))
4977 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs
))).is_constant (&size
)
4978 && native_encode_expr (rhs
, NULL
, size
) != 0);
4981 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4982 and return true on success or false on failure. */
4985 adjust_bit_pos (poly_offset_int byte_off
,
4986 poly_int64
*pbitpos
,
4987 poly_uint64
*pbitregion_start
,
4988 poly_uint64
*pbitregion_end
)
4990 poly_offset_int bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4991 bit_off
+= *pbitpos
;
4993 if (known_ge (bit_off
, 0) && bit_off
.to_shwi (pbitpos
))
4995 if (maybe_ne (*pbitregion_end
, 0U))
4997 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4998 bit_off
+= *pbitregion_start
;
4999 if (bit_off
.to_uhwi (pbitregion_start
))
5001 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
5002 bit_off
+= *pbitregion_end
;
5003 if (!bit_off
.to_uhwi (pbitregion_end
))
5004 *pbitregion_end
= 0;
5007 *pbitregion_end
= 0;
5015 /* If MEM is a memory reference usable for store merging (either as
5016 store destination or for loads), return the non-NULL base_addr
5017 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
5018 Otherwise return NULL, *PBITPOS should be still valid even for that
5022 mem_valid_for_store_merging (tree mem
, poly_uint64
*pbitsize
,
5023 poly_uint64
*pbitpos
,
5024 poly_uint64
*pbitregion_start
,
5025 poly_uint64
*pbitregion_end
)
5027 poly_int64 bitsize
, bitpos
;
5028 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
5030 int unsignedp
= 0, reversep
= 0, volatilep
= 0;
5032 tree base_addr
= get_inner_reference (mem
, &bitsize
, &bitpos
, &offset
, &mode
,
5033 &unsignedp
, &reversep
, &volatilep
);
5034 *pbitsize
= bitsize
;
5035 if (known_le (bitsize
, 0))
5038 if (TREE_CODE (mem
) == COMPONENT_REF
5039 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem
, 1)))
5041 get_bit_range (&bitregion_start
, &bitregion_end
, mem
, &bitpos
, &offset
);
5042 if (maybe_ne (bitregion_end
, 0U))
5049 /* We do not want to rewrite TARGET_MEM_REFs. */
5050 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
5052 /* In some cases get_inner_reference may return a
5053 MEM_REF [ptr + byteoffset]. For the purposes of this pass
5054 canonicalize the base_addr to MEM_REF [ptr] and take
5055 byteoffset into account in the bitpos. This occurs in
5056 PR 23684 and this way we can catch more chains. */
5057 else if (TREE_CODE (base_addr
) == MEM_REF
)
5059 if (!adjust_bit_pos (mem_ref_offset (base_addr
), &bitpos
,
5060 &bitregion_start
, &bitregion_end
))
5062 base_addr
= TREE_OPERAND (base_addr
, 0);
5064 /* get_inner_reference returns the base object, get at its
5068 if (maybe_lt (bitpos
, 0))
5070 base_addr
= build_fold_addr_expr (base_addr
);
5075 /* If the access is variable offset then a base decl has to be
5076 address-taken to be able to emit pointer-based stores to it.
5077 ??? We might be able to get away with re-using the original
5078 base up to the first variable part and then wrapping that inside
5080 tree base
= get_base_address (base_addr
);
5081 if (!base
|| (DECL_P (base
) && !TREE_ADDRESSABLE (base
)))
5084 /* Similarly to above for the base, remove constant from the offset. */
5085 if (TREE_CODE (offset
) == PLUS_EXPR
5086 && TREE_CODE (TREE_OPERAND (offset
, 1)) == INTEGER_CST
5087 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset
, 1)),
5088 &bitpos
, &bitregion_start
, &bitregion_end
))
5089 offset
= TREE_OPERAND (offset
, 0);
5091 base_addr
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base_addr
),
5095 if (known_eq (bitregion_end
, 0U))
5097 bitregion_start
= round_down_to_byte_boundary (bitpos
);
5098 bitregion_end
= round_up_to_byte_boundary (bitpos
+ bitsize
);
5101 *pbitsize
= bitsize
;
5103 *pbitregion_start
= bitregion_start
;
5104 *pbitregion_end
= bitregion_end
;
5108 /* Return true if STMT is a load that can be used for store merging.
5109 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
5110 BITREGION_END are properties of the corresponding store. */
5113 handled_load (gimple
*stmt
, store_operand_info
*op
,
5114 poly_uint64 bitsize
, poly_uint64 bitpos
,
5115 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
)
5117 if (!is_gimple_assign (stmt
))
5119 if (gimple_assign_rhs_code (stmt
) == BIT_NOT_EXPR
)
5121 tree rhs1
= gimple_assign_rhs1 (stmt
);
5122 if (TREE_CODE (rhs1
) == SSA_NAME
5123 && handled_load (SSA_NAME_DEF_STMT (rhs1
), op
, bitsize
, bitpos
,
5124 bitregion_start
, bitregion_end
))
5126 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5127 been optimized earlier, but if allowed here, would confuse the
5128 multiple uses counting. */
5131 op
->bit_not_p
= !op
->bit_not_p
;
5136 if (gimple_vuse (stmt
)
5137 && gimple_assign_load_p (stmt
)
5138 && !stmt_can_throw_internal (cfun
, stmt
)
5139 && !gimple_has_volatile_ops (stmt
))
5141 tree mem
= gimple_assign_rhs1 (stmt
);
5143 = mem_valid_for_store_merging (mem
, &op
->bitsize
, &op
->bitpos
,
5144 &op
->bitregion_start
,
5145 &op
->bitregion_end
);
5146 if (op
->base_addr
!= NULL_TREE
5147 && known_eq (op
->bitsize
, bitsize
)
5148 && multiple_p (op
->bitpos
- bitpos
, BITS_PER_UNIT
)
5149 && known_ge (op
->bitpos
- op
->bitregion_start
,
5150 bitpos
- bitregion_start
)
5151 && known_ge (op
->bitregion_end
- op
->bitpos
,
5152 bitregion_end
- bitpos
))
5156 op
->bit_not_p
= false;
5163 /* Return the index number of the landing pad for STMT, if any. */
5166 lp_nr_for_store (gimple
*stmt
)
5168 if (!cfun
->can_throw_non_call_exceptions
|| !cfun
->eh
)
5171 if (!stmt_could_throw_p (cfun
, stmt
))
5174 return lookup_stmt_eh_lp (stmt
);
5177 /* Record the store STMT for store merging optimization if it can be
5178 optimized. Return true if any changes were made. */
5181 pass_store_merging::process_store (gimple
*stmt
)
5183 tree lhs
= gimple_assign_lhs (stmt
);
5184 tree rhs
= gimple_assign_rhs1 (stmt
);
5185 poly_uint64 bitsize
, bitpos
= 0;
5186 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
5188 = mem_valid_for_store_merging (lhs
, &bitsize
, &bitpos
,
5189 &bitregion_start
, &bitregion_end
);
5190 if (known_eq (bitsize
, 0U))
5193 bool invalid
= (base_addr
== NULL_TREE
5194 || (maybe_gt (bitsize
,
5195 (unsigned int) MAX_BITSIZE_MODE_ANY_INT
)
5196 && TREE_CODE (rhs
) != INTEGER_CST
5197 && (TREE_CODE (rhs
) != CONSTRUCTOR
5198 || CONSTRUCTOR_NELTS (rhs
) != 0)));
5199 enum tree_code rhs_code
= ERROR_MARK
;
5200 bool bit_not_p
= false;
5201 struct symbolic_number n
;
5202 gimple
*ins_stmt
= NULL
;
5203 store_operand_info ops
[2];
5206 else if (TREE_CODE (rhs
) == STRING_CST
)
5208 rhs_code
= STRING_CST
;
5211 else if (rhs_valid_for_store_merging_p (rhs
))
5213 rhs_code
= INTEGER_CST
;
5216 else if (TREE_CODE (rhs
) == SSA_NAME
)
5218 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
), *def_stmt1
, *def_stmt2
;
5219 if (!is_gimple_assign (def_stmt
))
5221 else if (handled_load (def_stmt
, &ops
[0], bitsize
, bitpos
,
5222 bitregion_start
, bitregion_end
))
5224 else if (gimple_assign_rhs_code (def_stmt
) == BIT_NOT_EXPR
)
5226 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5227 if (TREE_CODE (rhs1
) == SSA_NAME
5228 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1
)))
5231 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
5235 if (rhs_code
== ERROR_MARK
&& !invalid
)
5236 switch ((rhs_code
= gimple_assign_rhs_code (def_stmt
)))
5242 rhs1
= gimple_assign_rhs1 (def_stmt
);
5243 rhs2
= gimple_assign_rhs2 (def_stmt
);
5245 if (TREE_CODE (rhs1
) != SSA_NAME
)
5247 def_stmt1
= SSA_NAME_DEF_STMT (rhs1
);
5248 if (!is_gimple_assign (def_stmt1
)
5249 || !handled_load (def_stmt1
, &ops
[0], bitsize
, bitpos
,
5250 bitregion_start
, bitregion_end
))
5252 if (rhs_valid_for_store_merging_p (rhs2
))
5254 else if (TREE_CODE (rhs2
) != SSA_NAME
)
5258 def_stmt2
= SSA_NAME_DEF_STMT (rhs2
);
5259 if (!is_gimple_assign (def_stmt2
))
5261 else if (!handled_load (def_stmt2
, &ops
[1], bitsize
, bitpos
,
5262 bitregion_start
, bitregion_end
))
5272 unsigned HOST_WIDE_INT const_bitsize
;
5273 if (bitsize
.is_constant (&const_bitsize
)
5274 && (const_bitsize
% BITS_PER_UNIT
) == 0
5275 && const_bitsize
<= 64
5276 && multiple_p (bitpos
, BITS_PER_UNIT
))
5278 ins_stmt
= find_bswap_or_nop_1 (def_stmt
, &n
, 12);
5282 for (unsigned HOST_WIDE_INT i
= 0;
5284 i
+= BITS_PER_UNIT
, nn
>>= BITS_PER_MARKER
)
5285 if ((nn
& MARKER_MASK
) == 0
5286 || (nn
& MARKER_MASK
) == MARKER_BYTE_UNKNOWN
)
5295 rhs_code
= LROTATE_EXPR
;
5296 ops
[0].base_addr
= NULL_TREE
;
5297 ops
[1].base_addr
= NULL_TREE
;
5305 && bitsize
.is_constant (&const_bitsize
)
5306 && ((const_bitsize
% BITS_PER_UNIT
) != 0
5307 || !multiple_p (bitpos
, BITS_PER_UNIT
))
5308 && const_bitsize
<= MAX_FIXED_MODE_SIZE
)
5310 /* Bypass a conversion to the bit-field type. */
5312 && is_gimple_assign (def_stmt
)
5313 && CONVERT_EXPR_CODE_P (rhs_code
))
5315 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5316 if (TREE_CODE (rhs1
) == SSA_NAME
5317 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
)))
5320 rhs_code
= BIT_INSERT_EXPR
;
5323 ops
[0].base_addr
= NULL_TREE
;
5324 ops
[1].base_addr
= NULL_TREE
;
5331 unsigned HOST_WIDE_INT const_bitsize
, const_bitpos
;
5332 unsigned HOST_WIDE_INT const_bitregion_start
, const_bitregion_end
;
5334 || !bitsize
.is_constant (&const_bitsize
)
5335 || !bitpos
.is_constant (&const_bitpos
)
5336 || !bitregion_start
.is_constant (&const_bitregion_start
)
5337 || !bitregion_end
.is_constant (&const_bitregion_end
))
5338 return terminate_all_aliasing_chains (NULL
, stmt
);
5341 memset (&n
, 0, sizeof (n
));
5343 class imm_store_chain_info
**chain_info
= NULL
;
5346 chain_info
= m_stores
.get (base_addr
);
5348 store_immediate_info
*info
;
5351 unsigned int ord
= (*chain_info
)->m_store_info
.length ();
5352 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
5353 const_bitregion_start
,
5354 const_bitregion_end
,
5355 stmt
, ord
, rhs_code
, n
, ins_stmt
,
5356 bit_not_p
, lp_nr_for_store (stmt
),
5358 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5360 fprintf (dump_file
, "Recording immediate store from stmt:\n");
5361 print_gimple_stmt (dump_file
, stmt
, 0);
5363 (*chain_info
)->m_store_info
.safe_push (info
);
5365 ret
|= terminate_all_aliasing_chains (chain_info
, stmt
);
5366 /* If we reach the limit of stores to merge in a chain terminate and
5367 process the chain now. */
5368 if ((*chain_info
)->m_store_info
.length ()
5369 == (unsigned int) param_max_stores_to_merge
)
5371 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5373 "Reached maximum number of statements to merge:\n");
5374 ret
|= terminate_and_process_chain (*chain_info
);
5379 /* Store aliases any existing chain? */
5380 ret
|= terminate_all_aliasing_chains (NULL
, stmt
);
5382 /* Start a new chain. */
5383 class imm_store_chain_info
*new_chain
5384 = new imm_store_chain_info (m_stores_head
, base_addr
);
5385 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
5386 const_bitregion_start
,
5387 const_bitregion_end
,
5388 stmt
, 0, rhs_code
, n
, ins_stmt
,
5389 bit_not_p
, lp_nr_for_store (stmt
),
5391 new_chain
->m_store_info
.safe_push (info
);
5393 m_stores
.put (base_addr
, new_chain
);
5395 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5397 fprintf (dump_file
, "Starting active chain number %u with statement:\n",
5399 print_gimple_stmt (dump_file
, stmt
, 0);
5400 fprintf (dump_file
, "The base object is:\n");
5401 print_generic_expr (dump_file
, base_addr
);
5402 fprintf (dump_file
, "\n");
5406 /* Prune oldest chains so that after adding the chain or store above
5407 we're again within the limits set by the params. */
5408 if (m_n_chains
> (unsigned)param_max_store_chains_to_track
5409 || m_n_stores
> (unsigned)param_max_stores_to_track
)
5411 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5412 fprintf (dump_file
, "Too many chains (%u > %d) or stores (%u > %d), "
5413 "terminating oldest chain(s).\n", m_n_chains
,
5414 param_max_store_chains_to_track
, m_n_stores
,
5415 param_max_stores_to_track
);
5416 imm_store_chain_info
**e
= &m_stores_head
;
5418 unsigned n_stores
= 0;
5421 if (idx
>= (unsigned)param_max_store_chains_to_track
5422 || (n_stores
+ (*e
)->m_store_info
.length ()
5423 > (unsigned)param_max_stores_to_track
))
5424 ret
|= terminate_and_process_chain (*e
);
5427 n_stores
+= (*e
)->m_store_info
.length ();
5437 /* Return true if STMT is a store valid for store merging. */
5440 store_valid_for_store_merging_p (gimple
*stmt
)
5442 return gimple_assign_single_p (stmt
)
5443 && gimple_vdef (stmt
)
5444 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt
))
5445 && (!gimple_has_volatile_ops (stmt
) || gimple_clobber_p (stmt
));
5448 enum basic_block_status
{ BB_INVALID
, BB_VALID
, BB_EXTENDED_VALID
};
5450 /* Return the status of basic block BB wrt store merging. */
5452 static enum basic_block_status
5453 get_status_for_store_merging (basic_block bb
)
5455 unsigned int num_statements
= 0;
5456 unsigned int num_constructors
= 0;
5457 gimple_stmt_iterator gsi
;
5459 gimple
*last_stmt
= NULL
;
5461 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5463 gimple
*stmt
= gsi_stmt (gsi
);
5465 if (is_gimple_debug (stmt
))
5470 if (store_valid_for_store_merging_p (stmt
) && ++num_statements
>= 2)
5473 if (is_gimple_assign (stmt
)
5474 && gimple_assign_rhs_code (stmt
) == CONSTRUCTOR
)
5476 tree rhs
= gimple_assign_rhs1 (stmt
);
5477 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
5478 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs
)))
5479 && gimple_assign_lhs (stmt
) != NULL_TREE
)
5482 = int_size_in_bytes (TREE_TYPE (rhs
)) * BITS_PER_UNIT
;
5483 if (sz
== 16 || sz
== 32 || sz
== 64)
5485 num_constructors
= 1;
5492 if (num_statements
== 0 && num_constructors
== 0)
5495 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
5496 && store_valid_for_store_merging_p (last_stmt
)
5497 && (e
= find_fallthru_edge (bb
->succs
))
5498 && e
->dest
== bb
->next_bb
)
5499 return BB_EXTENDED_VALID
;
5501 return (num_statements
>= 2 || num_constructors
) ? BB_VALID
: BB_INVALID
;
5504 /* Entry point for the pass. Go over each basic block recording chains of
5505 immediate stores. Upon encountering a terminating statement (as defined
5506 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5510 pass_store_merging::execute (function
*fun
)
5513 hash_set
<gimple
*> orig_stmts
;
5514 bool changed
= false, open_chains
= false;
5516 /* If the function can throw and catch non-call exceptions, we'll be trying
5517 to merge stores across different basic blocks so we need to first unsplit
5518 the EH edges in order to streamline the CFG of the function. */
5519 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
)
5520 unsplit_eh_edges ();
5522 calculate_dominance_info (CDI_DOMINATORS
);
5524 FOR_EACH_BB_FN (bb
, fun
)
5526 const basic_block_status bb_status
= get_status_for_store_merging (bb
);
5527 gimple_stmt_iterator gsi
;
5529 if (open_chains
&& (bb_status
== BB_INVALID
|| !single_pred_p (bb
)))
5531 changed
|= terminate_and_process_all_chains ();
5532 open_chains
= false;
5535 if (bb_status
== BB_INVALID
)
5538 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5539 fprintf (dump_file
, "Processing basic block <%d>:\n", bb
->index
);
5541 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); )
5543 gimple
*stmt
= gsi_stmt (gsi
);
5546 if (is_gimple_debug (stmt
))
5549 if (gimple_has_volatile_ops (stmt
) && !gimple_clobber_p (stmt
))
5551 /* Terminate all chains. */
5552 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5553 fprintf (dump_file
, "Volatile access terminates "
5555 changed
|= terminate_and_process_all_chains ();
5556 open_chains
= false;
5560 if (is_gimple_assign (stmt
)
5561 && gimple_assign_rhs_code (stmt
) == CONSTRUCTOR
5562 && maybe_optimize_vector_constructor (stmt
))
5565 if (store_valid_for_store_merging_p (stmt
))
5566 changed
|= process_store (stmt
);
5568 changed
|= terminate_all_aliasing_chains (NULL
, stmt
);
5571 if (bb_status
== BB_EXTENDED_VALID
)
5575 changed
|= terminate_and_process_all_chains ();
5576 open_chains
= false;
5581 changed
|= terminate_and_process_all_chains ();
5583 /* If the function can throw and catch non-call exceptions and something
5584 changed during the pass, then the CFG has (very likely) changed too. */
5585 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
&& changed
)
5587 free_dominance_info (CDI_DOMINATORS
);
5588 return TODO_cleanup_cfg
;
5596 /* Construct and return a store merging pass object. */
5599 make_pass_store_merging (gcc::context
*ctxt
)
5601 return new pass_store_merging (ctxt
);
5606 namespace selftest
{
5608 /* Selftests for store merging helpers. */
5610 /* Assert that all elements of the byte arrays X and Y, both of length N
5614 verify_array_eq (unsigned char *x
, unsigned char *y
, unsigned int n
)
5616 for (unsigned int i
= 0; i
< n
; i
++)
5620 fprintf (stderr
, "Arrays do not match. X:\n");
5621 dump_char_array (stderr
, x
, n
);
5622 fprintf (stderr
, "Y:\n");
5623 dump_char_array (stderr
, y
, n
);
5625 ASSERT_EQ (x
[i
], y
[i
]);
5629 /* Test shift_bytes_in_array_left and that it carries bits across between
5633 verify_shift_bytes_in_array_left (void)
5636 00011111 | 11100000. */
5637 unsigned char orig
[2] = { 0xe0, 0x1f };
5638 unsigned char in
[2];
5639 memcpy (in
, orig
, sizeof orig
);
5641 unsigned char expected
[2] = { 0x80, 0x7f };
5642 shift_bytes_in_array_left (in
, sizeof (in
), 2);
5643 verify_array_eq (in
, expected
, sizeof (in
));
5645 memcpy (in
, orig
, sizeof orig
);
5646 memcpy (expected
, orig
, sizeof orig
);
5647 /* Check that shifting by zero doesn't change anything. */
5648 shift_bytes_in_array_left (in
, sizeof (in
), 0);
5649 verify_array_eq (in
, expected
, sizeof (in
));
5653 /* Test shift_bytes_in_array_right and that it carries bits across between
5657 verify_shift_bytes_in_array_right (void)
5660 00011111 | 11100000. */
5661 unsigned char orig
[2] = { 0x1f, 0xe0};
5662 unsigned char in
[2];
5663 memcpy (in
, orig
, sizeof orig
);
5664 unsigned char expected
[2] = { 0x07, 0xf8};
5665 shift_bytes_in_array_right (in
, sizeof (in
), 2);
5666 verify_array_eq (in
, expected
, sizeof (in
));
5668 memcpy (in
, orig
, sizeof orig
);
5669 memcpy (expected
, orig
, sizeof orig
);
5670 /* Check that shifting by zero doesn't change anything. */
5671 shift_bytes_in_array_right (in
, sizeof (in
), 0);
5672 verify_array_eq (in
, expected
, sizeof (in
));
5675 /* Test clear_bit_region that it clears exactly the bits asked and
5679 verify_clear_bit_region (void)
5681 /* Start with all bits set and test clearing various patterns in them. */
5682 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5683 unsigned char in
[3];
5684 unsigned char expected
[3];
5685 memcpy (in
, orig
, sizeof in
);
5687 /* Check zeroing out all the bits. */
5688 clear_bit_region (in
, 0, 3 * BITS_PER_UNIT
);
5689 expected
[0] = expected
[1] = expected
[2] = 0;
5690 verify_array_eq (in
, expected
, sizeof in
);
5692 memcpy (in
, orig
, sizeof in
);
5693 /* Leave the first and last bits intact. */
5694 clear_bit_region (in
, 1, 3 * BITS_PER_UNIT
- 2);
5698 verify_array_eq (in
, expected
, sizeof in
);
5701 /* Test clear_bit_region_be that it clears exactly the bits asked and
5705 verify_clear_bit_region_be (void)
5707 /* Start with all bits set and test clearing various patterns in them. */
5708 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5709 unsigned char in
[3];
5710 unsigned char expected
[3];
5711 memcpy (in
, orig
, sizeof in
);
5713 /* Check zeroing out all the bits. */
5714 clear_bit_region_be (in
, BITS_PER_UNIT
- 1, 3 * BITS_PER_UNIT
);
5715 expected
[0] = expected
[1] = expected
[2] = 0;
5716 verify_array_eq (in
, expected
, sizeof in
);
5718 memcpy (in
, orig
, sizeof in
);
5719 /* Leave the first and last bits intact. */
5720 clear_bit_region_be (in
, BITS_PER_UNIT
- 2, 3 * BITS_PER_UNIT
- 2);
5724 verify_array_eq (in
, expected
, sizeof in
);
5728 /* Run all of the selftests within this file. */
5731 store_merging_cc_tests (void)
5733 verify_shift_bytes_in_array_left ();
5734 verify_shift_bytes_in_array_right ();
5735 verify_clear_bit_region ();
5736 verify_clear_bit_region_be ();
5739 } // namespace selftest
5740 #endif /* CHECKING_P. */