libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / tree-ssa-strlen.cc
blobee60909aa21ae4716461678526c069c113a16445
1 /* String length optimization
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 Contributed by Jakub Jelinek <jakub@redhat.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple-ssa-warn-access.h"
34 #include "gimple-ssa-warn-restrict.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "gimple-fold.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimplify-me.h"
42 #include "expr.h"
43 #include "tree-cfg.h"
44 #include "tree-dfa.h"
45 #include "domwalk.h"
46 #include "tree-ssa-alias.h"
47 #include "tree-ssa-propagate.h"
48 #include "tree-ssa-strlen.h"
49 #include "tree-hash-traits.h"
50 #include "builtins.h"
51 #include "pointer-query.h"
52 #include "target.h"
53 #include "diagnostic-core.h"
54 #include "diagnostic.h"
55 #include "intl.h"
56 #include "attribs.h"
57 #include "calls.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "vr-values.h"
62 #include "gimple-range.h"
63 #include "tree-ssa.h"
65 /* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
66 is an index into strinfo vector, negative value stands for
67 string length of a string literal (~strlen). */
68 static vec<int> ssa_ver_to_stridx;
70 /* Number of currently active string indexes plus one. */
71 static int max_stridx;
73 /* Set to true to optimize, false when just checking. */
74 static bool strlen_optimize;
76 /* String information record. */
77 struct strinfo
79 /* Number of leading characters that are known to be nonzero. This is
80 also the length of the string if FULL_STRING_P.
82 The values in a list of related string pointers must be consistent;
83 that is, if strinfo B comes X bytes after strinfo A, it must be
84 the case that A->nonzero_chars == X + B->nonzero_chars. */
85 tree nonzero_chars;
86 /* Any of the corresponding pointers for querying alias oracle. */
87 tree ptr;
88 /* STMT is used for two things:
90 - To record the statement that should be used for delayed length
91 computations. We maintain the invariant that all related strinfos
92 have delayed lengths or none do.
94 - To record the malloc or calloc call that produced this result
95 to optimize away malloc/memset sequences. STMT is reset after
96 a calloc-allocated object has been stored a non-zero value into. */
97 gimple *stmt;
98 /* Set to the dynamic allocation statement for the object (alloca,
99 calloc, malloc, or VLA). Unlike STMT, once set for a strinfo
100 object, ALLOC doesn't change. */
101 gimple *alloc;
102 /* Pointer to '\0' if known, if NULL, it can be computed as
103 ptr + length. */
104 tree endptr;
105 /* Reference count. Any changes to strinfo entry possibly shared
106 with dominating basic blocks need unshare_strinfo first, except
107 for dont_invalidate which affects only the immediately next
108 maybe_invalidate. */
109 int refcount;
110 /* Copy of index. get_strinfo (si->idx) should return si; */
111 int idx;
112 /* These 3 fields are for chaining related string pointers together.
113 E.g. for
114 bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
115 strcpy (c, d); e = c + dl;
116 strinfo(a) -> strinfo(c) -> strinfo(e)
117 All have ->first field equal to strinfo(a)->idx and are doubly
118 chained through prev/next fields. The later strinfos are required
119 to point into the same string with zero or more bytes after
120 the previous pointer and all bytes in between the two pointers
121 must be non-zero. Functions like strcpy or memcpy are supposed
122 to adjust all previous strinfo lengths, but not following strinfo
123 lengths (those are uncertain, usually invalidated during
124 maybe_invalidate, except when the alias oracle knows better).
125 Functions like strcat on the other side adjust the whole
126 related strinfo chain.
127 They are updated lazily, so to use the chain the same first fields
128 and si->prev->next == si->idx needs to be verified. */
129 int first;
130 int next;
131 int prev;
132 /* A flag whether the string is known to be written in the current
133 function. */
134 bool writable;
135 /* A flag for the next maybe_invalidate that this strinfo shouldn't
136 be invalidated. Always cleared by maybe_invalidate. */
137 bool dont_invalidate;
138 /* True if the string is known to be nul-terminated after NONZERO_CHARS
139 characters. False is useful when detecting strings that are built
140 up via successive memcpys. */
141 bool full_string_p;
144 /* Pool for allocating strinfo_struct entries. */
145 static object_allocator<strinfo> strinfo_pool ("strinfo pool");
147 /* Vector mapping positive string indexes to strinfo, for the
148 current basic block. The first pointer in the vector is special,
149 it is either NULL, meaning the vector isn't shared, or it is
150 a basic block pointer to the owner basic_block if shared.
151 If some other bb wants to modify the vector, the vector needs
152 to be unshared first, and only the owner bb is supposed to free it. */
153 static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
155 /* One OFFSET->IDX mapping. */
156 struct stridxlist
158 struct stridxlist *next;
159 HOST_WIDE_INT offset;
160 int idx;
163 /* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings. */
164 struct decl_stridxlist_map
166 struct tree_map_base base;
167 struct stridxlist list;
170 /* Hash table for mapping decls to a chained list of offset -> idx
171 mappings. */
172 typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
173 static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
175 /* Hash table mapping strlen (or strnlen with constant bound and return
176 smaller than bound) calls to stridx instances describing
177 the calls' arguments. Non-null only when warn_stringop_truncation
178 is non-zero. */
179 typedef std::pair<int, location_t> stridx_strlenloc;
180 static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
182 /* Obstack for struct stridxlist and struct decl_stridxlist_map. */
183 static struct obstack stridx_obstack;
185 /* Last memcpy statement if it could be adjusted if the trailing
186 '\0' written is immediately overwritten, or
187 *x = '\0' store that could be removed if it is immediately overwritten. */
188 struct laststmt_struct
190 gimple *stmt;
191 tree len;
192 int stridx;
193 } laststmt;
195 static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
196 static bool get_range_strlen_dynamic (tree, gimple *, c_strlen_data *,
197 bitmap, pointer_query *, unsigned *);
199 /* Sets MINMAX to either the constant value or the range VAL is in
200 and returns either the constant value or VAL on success or null
201 when the range couldn't be determined. Uses RVALS or CFUN for
202 range info, whichever is nonnull. */
204 tree
205 get_range (tree val, gimple *stmt, wide_int minmax[2],
206 range_query *rvals /* = NULL */)
208 if (!rvals)
210 if (!cfun)
211 /* When called from front ends for global initializers CFUN
212 may be null. */
213 return NULL_TREE;
215 rvals = get_range_query (cfun);
218 value_range vr (TREE_TYPE (val));
219 if (!rvals->range_of_expr (vr, val, stmt))
220 return NULL_TREE;
222 tree vrmin, vrmax;
223 value_range_kind rng = get_legacy_range (vr, vrmin, vrmax);
224 if (rng == VR_RANGE)
226 /* Only handle straight ranges. */
227 minmax[0] = wi::to_wide (vrmin);
228 minmax[1] = wi::to_wide (vrmax);
229 return val;
232 return NULL_TREE;
235 class strlen_pass : public dom_walker
237 public:
238 strlen_pass (function *fun, cdi_direction direction)
239 : dom_walker (direction),
240 ptr_qry (get_range_query (fun)),
241 m_cleanup_cfg (false)
245 ~strlen_pass ();
247 edge before_dom_children (basic_block) final override;
248 void after_dom_children (basic_block) final override;
250 bool check_and_optimize_stmt (bool *cleanup_eh);
251 bool check_and_optimize_call (bool *zero_write);
252 bool handle_assign (tree lhs, bool *zero_write);
253 bool handle_store (bool *zero_write);
254 void handle_pointer_plus ();
255 void handle_builtin_strlen ();
256 void handle_builtin_strchr ();
257 void handle_builtin_strcpy (built_in_function);
258 void handle_integral_assign (bool *cleanup_eh);
259 void handle_builtin_stxncpy_strncat (bool append_p);
260 void handle_builtin_memcpy (built_in_function bcode);
261 void handle_builtin_strcat (built_in_function bcode);
262 void handle_builtin_strncat (built_in_function);
263 bool handle_builtin_memset (bool *zero_write);
264 bool handle_builtin_memcmp ();
265 bool handle_builtin_string_cmp ();
266 void handle_alloc_call (built_in_function);
267 void maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
268 strinfo *si = NULL, bool plus_one = false,
269 bool rawmem = false);
270 void maybe_warn_overflow (gimple *stmt, bool call_lhs,
271 unsigned HOST_WIDE_INT len,
272 strinfo *si = NULL,
273 bool plus_one = false, bool rawmem = false);
274 void adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat);
275 tree strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
276 tree arg2, int idx2,
277 unsigned HOST_WIDE_INT bound,
278 unsigned HOST_WIDE_INT len[2],
279 unsigned HOST_WIDE_INT *psize);
280 bool count_nonzero_bytes (tree expr_or_type,
281 gimple *stmt,
282 unsigned lenrange[3], bool *nulterm,
283 bool *allnul, bool *allnonnul);
284 bool count_nonzero_bytes (tree exp, tree vuse,
285 gimple *stmt,
286 unsigned HOST_WIDE_INT offset,
287 unsigned HOST_WIDE_INT nbytes,
288 unsigned lenrange[3], bool *nulterm,
289 bool *allnul, bool *allnonnul,
290 ssa_name_limit_t &snlim);
291 bool count_nonzero_bytes_addr (tree exp, tree vuse,
292 gimple *stmt,
293 unsigned HOST_WIDE_INT offset,
294 unsigned HOST_WIDE_INT nbytes,
295 unsigned lenrange[3], bool *nulterm,
296 bool *allnul, bool *allnonnul,
297 ssa_name_limit_t &snlim);
298 bool get_len_or_size (gimple *stmt, tree arg, int idx,
299 unsigned HOST_WIDE_INT lenrng[2],
300 unsigned HOST_WIDE_INT *size, bool *nulterm);
302 /* A pointer_query object to store information about pointers and
303 their targets in. */
304 pointer_query ptr_qry;
306 gimple_stmt_iterator m_gsi;
308 /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
309 execute function. */
310 bool m_cleanup_cfg;
313 /* Return:
315 * +1 if SI is known to start with more than OFF nonzero characters.
317 * 0 if SI is known to start with exactly OFF nonzero characters.
319 * -1 if SI either does not start with OFF nonzero characters
320 or the relationship between the number of leading nonzero
321 characters in SI and OFF is unknown. */
323 static int
324 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
326 if (si->nonzero_chars
327 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
328 return compare_tree_int (si->nonzero_chars, off);
329 else
330 return -1;
333 /* Same as above but suitable also for strings with non-constant lengths.
334 Uses RVALS to determine length range. */
336 static int
337 compare_nonzero_chars (strinfo *si, gimple *stmt,
338 unsigned HOST_WIDE_INT off,
339 range_query *rvals)
341 if (!si->nonzero_chars)
342 return -1;
344 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
345 return compare_tree_int (si->nonzero_chars, off);
347 if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
348 return -1;
350 int_range_max vr;
351 if (!rvals->range_of_expr (vr, si->nonzero_chars, stmt)
352 || vr.varying_p ()
353 || vr.undefined_p ())
354 return -1;
356 /* If the offset is less than the minimum length or if the bounds
357 of the length range are equal return the result of the comparison
358 same as in the constant case. Otherwise return a conservative
359 result. */
360 signop sign = TYPE_SIGN (vr.type ());
361 unsigned prec = TYPE_PRECISION (vr.type ());
362 int cmpmin = wi::cmp (vr.lower_bound (), wi::uhwi (off, prec), sign);
363 if (cmpmin > 0 || vr.singleton_p ())
364 return cmpmin;
366 return -1;
369 /* Return true if SI is known to be a zero-length string. */
371 static inline bool
372 zero_length_string_p (strinfo *si)
374 return si->full_string_p && integer_zerop (si->nonzero_chars);
377 /* Return strinfo vector entry IDX. */
379 static inline strinfo *
380 get_strinfo (int idx)
382 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
383 return NULL;
384 return (*stridx_to_strinfo)[idx];
387 /* Get the next strinfo in the chain after SI, or null if none. */
389 static inline strinfo *
390 get_next_strinfo (strinfo *si)
392 if (si->next == 0)
393 return NULL;
394 strinfo *nextsi = get_strinfo (si->next);
395 if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
396 return NULL;
397 return nextsi;
400 /* Helper function for get_stridx. Return the strinfo index of the address
401 of EXP, which is available in PTR if nonnull. If OFFSET_OUT, it is
402 OK to return the index for some X <= &EXP and store &EXP - X in
403 *OFFSET_OUT. When RVALS is nonnull uses it to determine range
404 information. */
406 static int
407 get_addr_stridx (tree exp, gimple *stmt,
408 tree ptr, unsigned HOST_WIDE_INT *offset_out,
409 range_query *rvals = NULL)
411 HOST_WIDE_INT off;
412 struct stridxlist *list, *last = NULL;
413 tree base;
415 if (!decl_to_stridxlist_htab)
416 return 0;
418 poly_int64 poff;
419 base = get_addr_base_and_unit_offset (exp, &poff);
420 if (base == NULL || !DECL_P (base) || !poff.is_constant (&off))
421 return 0;
423 list = decl_to_stridxlist_htab->get (base);
424 if (list == NULL)
425 return 0;
429 if (list->offset == off)
431 if (offset_out)
432 *offset_out = 0;
433 return list->idx;
435 if (list->offset > off)
436 return 0;
437 last = list;
438 list = list->next;
440 while (list);
442 if ((offset_out || ptr) && last && last->idx > 0)
444 unsigned HOST_WIDE_INT rel_off
445 = (unsigned HOST_WIDE_INT) off - last->offset;
446 strinfo *si = get_strinfo (last->idx);
447 if (si && compare_nonzero_chars (si, stmt, rel_off, rvals) >= 0)
449 if (offset_out)
451 *offset_out = rel_off;
452 return last->idx;
454 else
455 return get_stridx_plus_constant (si, rel_off, ptr);
458 return 0;
461 /* Returns string index for EXP. When EXP is an SSA_NAME that refers
462 to a known strinfo with an offset and OFFRNG is non-null, sets
463 both elements of the OFFRNG array to the range of the offset and
464 returns the index of the known strinfo. In this case the result
465 must not be used in for functions that modify the string.
466 When nonnull, uses RVALS to determine range information. */
468 static int
469 get_stridx (tree exp, gimple *stmt,
470 wide_int offrng[2] = NULL, range_query *rvals = NULL)
472 if (offrng)
473 offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
475 if (TREE_CODE (exp) == SSA_NAME)
477 if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
478 return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
480 tree e = exp;
481 int last_idx = 0;
482 HOST_WIDE_INT offset = 0;
483 /* Follow a chain of at most 5 assignments. */
484 for (int i = 0; i < 5; i++)
486 gimple *def_stmt = SSA_NAME_DEF_STMT (e);
487 if (!is_gimple_assign (def_stmt))
488 return last_idx;
490 tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
491 tree ptr, off;
493 if (rhs_code == ADDR_EXPR)
495 /* Handle indices/offsets into VLAs which are implemented
496 as pointers to arrays. */
497 ptr = gimple_assign_rhs1 (def_stmt);
498 ptr = TREE_OPERAND (ptr, 0);
500 /* Handle also VLAs of types larger than char. */
501 if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
503 if (TREE_CODE (ptr) == ARRAY_REF)
505 off = TREE_OPERAND (ptr, 1);
506 ptr = TREE_OPERAND (ptr, 0);
507 if (!integer_onep (eltsize))
509 /* Scale the array index by the size of the element
510 type in the rare case that it's greater than
511 the typical 1 for char, making sure both operands
512 have the same type. */
513 eltsize = fold_convert (ssizetype, eltsize);
514 off = fold_convert (ssizetype, off);
515 off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
518 else
519 off = integer_zero_node;
521 else
522 return 0;
524 if (TREE_CODE (ptr) != MEM_REF)
525 return 0;
527 /* Add the MEM_REF byte offset. */
528 tree mem_off = TREE_OPERAND (ptr, 1);
529 off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
530 ptr = TREE_OPERAND (ptr, 0);
532 else if (rhs_code == POINTER_PLUS_EXPR)
534 ptr = gimple_assign_rhs1 (def_stmt);
535 off = gimple_assign_rhs2 (def_stmt);
537 else
538 return 0;
540 if (TREE_CODE (ptr) != SSA_NAME)
541 return 0;
543 if (!tree_fits_shwi_p (off))
545 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
546 if (offrng)
548 /* Only when requested by setting OFFRNG to non-null,
549 return the index corresponding to the SSA_NAME.
550 Do this irrespective of the whether the offset
551 is known. */
552 if (get_range (off, def_stmt, offrng, rvals))
554 /* When the offset range is known, increment it
555 it by the constant offset computed in prior
556 iterations and store it in the OFFRNG array. */
557 offrng[0] += offset;
558 offrng[1] += offset;
560 else
562 /* When the offset range cannot be determined
563 store [0, SIZE_MAX] and let the caller decide
564 if the offset matters. */
565 offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
566 offrng[0] = wi::zero (offrng[1].get_precision ());
568 return idx;
570 return 0;
573 HOST_WIDE_INT this_off = tree_to_shwi (off);
574 if (offrng)
576 offrng[0] += wi::shwi (this_off, offrng->get_precision ());
577 offrng[1] += offrng[0];
580 if (this_off < 0)
581 return last_idx;
583 offset = (unsigned HOST_WIDE_INT) offset + this_off;
584 if (offset < 0)
585 return last_idx;
587 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
589 strinfo *si = get_strinfo (idx);
590 if (si)
592 if (compare_nonzero_chars (si, offset) >= 0)
593 return get_stridx_plus_constant (si, offset, exp);
595 if (offrng)
596 last_idx = idx;
599 e = ptr;
602 return last_idx;
605 if (TREE_CODE (exp) == ADDR_EXPR)
607 int idx = get_addr_stridx (TREE_OPERAND (exp, 0), stmt, exp, NULL);
608 if (idx != 0)
609 return idx;
612 const char *p = c_getstr (exp);
613 if (p)
614 return ~(int) strlen (p);
616 return 0;
619 /* Return true if strinfo vector is shared with the immediate dominator. */
621 static inline bool
622 strinfo_shared (void)
624 return vec_safe_length (stridx_to_strinfo)
625 && (*stridx_to_strinfo)[0] != NULL;
628 /* Unshare strinfo vector that is shared with the immediate dominator. */
630 static void
631 unshare_strinfo_vec (void)
633 strinfo *si;
634 unsigned int i = 0;
636 gcc_assert (strinfo_shared ());
637 stridx_to_strinfo = vec_safe_copy (stridx_to_strinfo);
638 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
639 if (si != NULL)
640 si->refcount++;
641 (*stridx_to_strinfo)[0] = NULL;
644 /* Attempt to create a string index for exp, ADDR_EXPR's operand.
645 Return a pointer to the location where the string index can
646 be stored (if 0) or is stored, or NULL if this can't be tracked. */
648 static int *
649 addr_stridxptr (tree exp)
651 HOST_WIDE_INT off;
653 poly_int64 poff;
654 tree base = get_addr_base_and_unit_offset (exp, &poff);
655 if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (&off))
656 return NULL;
658 if (!decl_to_stridxlist_htab)
660 decl_to_stridxlist_htab
661 = new hash_map<tree_decl_hash, stridxlist> (64);
662 gcc_obstack_init (&stridx_obstack);
665 bool existed;
666 stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (base, &existed);
667 if (existed)
669 int i;
670 stridxlist *before = NULL;
671 for (i = 0; i < 32; i++)
673 if (list->offset == off)
674 return &list->idx;
675 if (list->offset > off && before == NULL)
676 before = list;
677 if (list->next == NULL)
678 break;
679 list = list->next;
681 if (i == 32)
682 return NULL;
683 if (before)
685 list = before;
686 before = XOBNEW (&stridx_obstack, struct stridxlist);
687 *before = *list;
688 list->next = before;
689 list->offset = off;
690 list->idx = 0;
691 return &list->idx;
693 list->next = XOBNEW (&stridx_obstack, struct stridxlist);
694 list = list->next;
697 list->next = NULL;
698 list->offset = off;
699 list->idx = 0;
700 return &list->idx;
703 /* Create a new string index, or return 0 if reached limit. */
705 static int
706 new_stridx (tree exp)
708 int idx;
709 if (max_stridx >= param_max_tracked_strlens)
710 return 0;
711 if (TREE_CODE (exp) == SSA_NAME)
713 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
714 return 0;
715 idx = max_stridx++;
716 ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
717 return idx;
719 if (TREE_CODE (exp) == ADDR_EXPR)
721 int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
722 if (pidx != NULL)
724 gcc_assert (*pidx == 0);
725 *pidx = max_stridx++;
726 return *pidx;
729 return 0;
732 /* Like new_stridx, but for ADDR_EXPR's operand instead. */
734 static int
735 new_addr_stridx (tree exp)
737 int *pidx;
738 if (max_stridx >= param_max_tracked_strlens)
739 return 0;
740 pidx = addr_stridxptr (exp);
741 if (pidx != NULL)
743 gcc_assert (*pidx == 0);
744 *pidx = max_stridx++;
745 return *pidx;
747 return 0;
750 /* Create a new strinfo. */
752 static strinfo *
753 new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
755 strinfo *si = strinfo_pool.allocate ();
756 si->nonzero_chars = nonzero_chars;
757 STRIP_USELESS_TYPE_CONVERSION (ptr);
758 si->ptr = ptr;
759 si->stmt = NULL;
760 si->alloc = NULL;
761 si->endptr = NULL_TREE;
762 si->refcount = 1;
763 si->idx = idx;
764 si->first = 0;
765 si->prev = 0;
766 si->next = 0;
767 si->writable = false;
768 si->dont_invalidate = false;
769 si->full_string_p = full_string_p;
770 return si;
773 /* Decrease strinfo refcount and free it if not referenced anymore. */
775 static inline void
776 free_strinfo (strinfo *si)
778 if (si && --si->refcount == 0)
779 strinfo_pool.remove (si);
782 /* Set strinfo in the vector entry IDX to SI. */
784 static inline void
785 set_strinfo (int idx, strinfo *si)
787 if (vec_safe_length (stridx_to_strinfo) && (*stridx_to_strinfo)[0])
788 unshare_strinfo_vec ();
789 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
790 vec_safe_grow_cleared (stridx_to_strinfo, idx + 1, true);
791 (*stridx_to_strinfo)[idx] = si;
794 /* Return the first strinfo in the related strinfo chain
795 if all strinfos in between belong to the chain, otherwise NULL. */
797 static strinfo *
798 verify_related_strinfos (strinfo *origsi)
800 strinfo *si = origsi, *psi;
802 if (origsi->first == 0)
803 return NULL;
804 for (; si->prev; si = psi)
806 if (si->first != origsi->first)
807 return NULL;
808 psi = get_strinfo (si->prev);
809 if (psi == NULL)
810 return NULL;
811 if (psi->next != si->idx)
812 return NULL;
814 if (si->idx != si->first)
815 return NULL;
816 return si;
819 /* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
820 Use LOC for folding. */
822 static void
823 set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
825 si->endptr = endptr;
826 si->stmt = NULL;
827 tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
828 tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
829 si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
830 end_as_size, start_as_size);
831 si->full_string_p = true;
834 /* Return the string length, or NULL if it can't be computed.
835 The length may but need not be constant. Instead, it might be
836 the result of a strlen() call. */
838 static tree
839 get_string_length (strinfo *si)
841 /* If the length has already been computed return it if it's exact
842 (i.e., the string is nul-terminated at NONZERO_CHARS), or return
843 null if it isn't. */
844 if (si->nonzero_chars)
845 return si->full_string_p ? si->nonzero_chars : NULL;
847 /* If the string is the result of one of the built-in calls below
848 attempt to compute the length from the call statement. */
849 if (si->stmt)
851 gimple *stmt = si->stmt, *lenstmt;
852 tree callee, lhs, fn, tem;
853 location_t loc;
854 gimple_stmt_iterator gsi;
856 gcc_assert (is_gimple_call (stmt));
857 callee = gimple_call_fndecl (stmt);
858 gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
859 lhs = gimple_call_lhs (stmt);
860 /* unshare_strinfo is intentionally not called here. The (delayed)
861 transformation of strcpy or strcat into stpcpy is done at the place
862 of the former strcpy/strcat call and so can affect all the strinfos
863 with the same stmt. If they were unshared before and transformation
864 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
865 just compute the right length. */
866 switch (DECL_FUNCTION_CODE (callee))
868 case BUILT_IN_STRCAT:
869 case BUILT_IN_STRCAT_CHK:
870 gsi = gsi_for_stmt (stmt);
871 fn = builtin_decl_implicit (BUILT_IN_STRLEN);
872 gcc_assert (lhs == NULL_TREE);
873 tem = unshare_expr (gimple_call_arg (stmt, 0));
874 lenstmt = gimple_build_call (fn, 1, tem);
875 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), lenstmt);
876 gimple_call_set_lhs (lenstmt, lhs);
877 gimple_set_vuse (lenstmt, gimple_vuse (stmt));
878 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
879 tem = gimple_call_arg (stmt, 0);
880 if (!ptrofftype_p (TREE_TYPE (lhs)))
882 lhs = convert_to_ptrofftype (lhs);
883 lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
884 true, GSI_SAME_STMT);
886 lenstmt = gimple_build_assign
887 (make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
888 POINTER_PLUS_EXPR,tem, lhs);
889 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
890 gimple_call_set_arg (stmt, 0, gimple_assign_lhs (lenstmt));
891 lhs = NULL_TREE;
892 /* FALLTHRU */
893 case BUILT_IN_STRCPY:
894 case BUILT_IN_STRCPY_CHK:
895 gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
896 if (gimple_call_num_args (stmt) == 2)
897 fn = builtin_decl_implicit (BUILT_IN_STPCPY);
898 else
899 fn = builtin_decl_explicit (BUILT_IN_STPCPY_CHK);
900 gcc_assert (lhs == NULL_TREE);
901 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
903 fprintf (dump_file, "Optimizing: ");
904 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
906 gimple_call_set_fndecl (stmt, fn);
907 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
908 gimple_call_set_lhs (stmt, lhs);
909 update_stmt (stmt);
910 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
912 fprintf (dump_file, "into: ");
913 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
915 /* FALLTHRU */
916 case BUILT_IN_STPCPY:
917 case BUILT_IN_STPCPY_CHK:
918 gcc_assert (lhs != NULL_TREE);
919 loc = gimple_location (stmt);
920 set_endptr_and_length (loc, si, lhs);
921 for (strinfo *chainsi = verify_related_strinfos (si);
922 chainsi != NULL;
923 chainsi = get_next_strinfo (chainsi))
924 if (chainsi->nonzero_chars == NULL)
925 set_endptr_and_length (loc, chainsi, lhs);
926 break;
927 case BUILT_IN_ALLOCA:
928 case BUILT_IN_ALLOCA_WITH_ALIGN:
929 case BUILT_IN_MALLOC:
930 break;
931 /* BUILT_IN_CALLOC always has si->nonzero_chars set. */
932 default:
933 gcc_unreachable ();
934 break;
938 return si->nonzero_chars;
941 /* Dump strlen data to FP for statement STMT. When non-null, RVALS
942 points to the valuation engine used to calculate ranges, and is
943 used to dump strlen range for non-constant results. */
945 DEBUG_FUNCTION void
946 dump_strlen_info (FILE *fp, gimple *stmt, range_query *rvals)
948 if (stmt)
950 fprintf (fp, "\nDumping strlen pass data after ");
951 print_gimple_expr (fp, stmt, TDF_LINENO);
952 fputc ('\n', fp);
954 else
955 fprintf (fp, "\nDumping strlen pass data\n");
957 fprintf (fp, "max_stridx = %i\n", max_stridx);
958 fprintf (fp, "ssa_ver_to_stridx has %u elements\n",
959 ssa_ver_to_stridx.length ());
960 fprintf (fp, "stridx_to_strinfo");
961 if (stridx_to_strinfo)
963 fprintf (fp, " has %u elements\n", stridx_to_strinfo->length ());
964 for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
966 if (strinfo *si = (*stridx_to_strinfo)[i])
968 if (!si->idx)
969 continue;
970 fprintf (fp, " idx = %i", si->idx);
971 if (si->ptr)
973 fprintf (fp, ", ptr = ");
974 print_generic_expr (fp, si->ptr);
977 if (si->nonzero_chars)
979 fprintf (fp, ", nonzero_chars = ");
980 print_generic_expr (fp, si->nonzero_chars);
981 if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
983 int_range_max vr;
984 if (rvals)
985 rvals->range_of_expr (vr, si->nonzero_chars,
986 si->stmt);
987 else
988 get_range_query (cfun)->range_of_expr (vr,
989 si->nonzero_chars);
990 vr.dump (fp);
994 fprintf (fp, ", refcount = %i", si->refcount);
995 if (si->stmt)
997 fprintf (fp, ", stmt = ");
998 print_gimple_expr (fp, si->stmt, 0);
1000 if (si->alloc)
1002 fprintf (fp, ", alloc = ");
1003 print_gimple_expr (fp, si->alloc, 0);
1005 if (si->writable)
1006 fprintf (fp, ", writable");
1007 if (si->dont_invalidate)
1008 fprintf (fp, ", dont_invalidate");
1009 if (si->full_string_p)
1010 fprintf (fp, ", full_string_p");
1011 if (strinfo *next = get_next_strinfo (si))
1013 fprintf (fp, ", {");
1015 fprintf (fp, "%i%s", next->idx, next->first ? ", " : "");
1016 while ((next = get_next_strinfo (next)));
1017 fprintf (fp, "}");
1019 fputs ("\n", fp);
1023 else
1024 fprintf (fp, " = null\n");
1026 fprintf (fp, "decl_to_stridxlist_htab");
1027 if (decl_to_stridxlist_htab)
1029 fputs ("\n", fp);
1030 typedef decl_to_stridxlist_htab_t::iterator iter_t;
1031 for (iter_t it = decl_to_stridxlist_htab->begin ();
1032 it != decl_to_stridxlist_htab->end (); ++it)
1034 tree decl = (*it).first;
1035 stridxlist *list = &(*it).second;
1036 fprintf (fp, " decl = ");
1037 print_generic_expr (fp, decl);
1038 if (list)
1040 fprintf (fp, ", offsets = {");
1041 for (; list; list = list->next)
1042 fprintf (fp, "%lli%s", (long long) list->offset,
1043 list->next ? ", " : "");
1044 fputs ("}", fp);
1046 fputs ("\n", fp);
1049 else
1050 fprintf (fp, " = null\n");
1052 if (laststmt.stmt)
1054 fprintf (fp, "laststmt = ");
1055 print_gimple_expr (fp, laststmt.stmt, 0);
1056 fprintf (fp, ", len = ");
1057 print_generic_expr (fp, laststmt.len);
1058 fprintf (fp, ", stridx = %i\n", laststmt.stridx);
1062 /* Helper of get_range_strlen_dynamic(). See below. */
1064 static bool
1065 get_range_strlen_phi (tree src, gphi *phi,
1066 c_strlen_data *pdata, bitmap visited,
1067 pointer_query *ptr_qry, unsigned *pssa_def_max)
1069 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (src)))
1070 return true;
1072 if (*pssa_def_max == 0)
1073 return false;
1075 --*pssa_def_max;
1077 /* Iterate over the PHI arguments and determine the minimum and maximum
1078 length/size of each and incorporate them into the overall result. */
1079 for (unsigned i = 0; i != gimple_phi_num_args (phi); ++i)
1081 tree arg = gimple_phi_arg_def (phi, i);
1082 if (arg == gimple_phi_result (phi))
1083 continue;
1085 c_strlen_data argdata = { };
1086 if (!get_range_strlen_dynamic (arg, phi, &argdata, visited, ptr_qry,
1087 pssa_def_max))
1089 pdata->maxlen = build_all_ones_cst (size_type_node);
1090 continue;
1093 /* Set the DECL of an unterminated array this argument refers to
1094 if one hasn't been found yet. */
1095 if (!pdata->decl && argdata.decl)
1096 pdata->decl = argdata.decl;
1098 if (!argdata.minlen
1099 || (integer_zerop (argdata.minlen)
1100 && (!argdata.maxbound
1101 || integer_all_onesp (argdata.maxbound))
1102 && integer_all_onesp (argdata.maxlen)))
1104 /* Set the upper bound of the length to unbounded. */
1105 pdata->maxlen = build_all_ones_cst (size_type_node);
1106 continue;
1109 /* Adjust the minimum and maximum length determined so far and
1110 the upper bound on the array size. */
1111 if (TREE_CODE (argdata.minlen) == INTEGER_CST
1112 && (!pdata->minlen
1113 || tree_int_cst_lt (argdata.minlen, pdata->minlen)))
1114 pdata->minlen = argdata.minlen;
1116 if (TREE_CODE (argdata.maxlen) == INTEGER_CST
1117 && (!pdata->maxlen
1118 || (argdata.maxlen
1119 && tree_int_cst_lt (pdata->maxlen, argdata.maxlen))))
1120 pdata->maxlen = argdata.maxlen;
1122 if (!pdata->maxbound
1123 || TREE_CODE (pdata->maxbound) != INTEGER_CST
1124 || (argdata.maxbound
1125 && tree_int_cst_lt (pdata->maxbound, argdata.maxbound)
1126 && !integer_all_onesp (argdata.maxbound)))
1127 pdata->maxbound = argdata.maxbound;
1130 return true;
1133 /* Return the maximum possible length of the string PTR that's less
1134 than MAXLEN given the size of the object of subobject it points
1135 to at the given STMT. MAXLEN is the maximum length of the string
1136 determined so far. Return null when no such maximum can be
1137 determined. */
1139 static tree
1140 get_maxbound (tree ptr, gimple *stmt, offset_int maxlen,
1141 pointer_query *ptr_qry)
1143 access_ref aref;
1144 if (!ptr_qry->get_ref (ptr, stmt, &aref))
1145 return NULL_TREE;
1147 offset_int sizrem = aref.size_remaining ();
1148 if (sizrem <= 0)
1149 return NULL_TREE;
1151 if (sizrem < maxlen)
1152 maxlen = sizrem - 1;
1154 /* Try to determine the maximum from the subobject at the offset.
1155 This handles MEM [&some-struct, member-offset] that's often
1156 the result of folding COMPONENT_REF [some-struct, member]. */
1157 tree reftype = TREE_TYPE (aref.ref);
1158 if (!RECORD_OR_UNION_TYPE_P (reftype)
1159 || aref.offrng[0] != aref.offrng[1]
1160 || !wi::fits_shwi_p (aref.offrng[0]))
1161 return wide_int_to_tree (size_type_node, maxlen);
1163 HOST_WIDE_INT off = aref.offrng[0].to_shwi ();
1164 tree fld = field_at_offset (reftype, NULL_TREE, off);
1165 if (!fld || !DECL_SIZE_UNIT (fld))
1166 return wide_int_to_tree (size_type_node, maxlen);
1168 offset_int size = wi::to_offset (DECL_SIZE_UNIT (fld));
1169 if (maxlen < size)
1170 return wide_int_to_tree (size_type_node, maxlen);
1172 return wide_int_to_tree (size_type_node, size - 1);
1175 /* Attempt to determine the length of the string SRC. On success, store
1176 the length in *PDATA and return true. Otherwise, return false.
1177 VISITED is a bitmap of visited PHI nodes. RVALS points to the valuation
1178 engine used to calculate ranges. PSSA_DEF_MAX to an SSA_NAME
1179 assignment limit used to prevent runaway recursion. */
1181 static bool
1182 get_range_strlen_dynamic (tree src, gimple *stmt,
1183 c_strlen_data *pdata, bitmap visited,
1184 pointer_query *ptr_qry, unsigned *pssa_def_max)
1186 int idx = get_stridx (src, stmt);
1187 if (!idx)
1189 if (TREE_CODE (src) == SSA_NAME)
1191 gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1192 if (gphi *phi = dyn_cast<gphi *>(def_stmt))
1193 return get_range_strlen_phi (src, phi, pdata, visited, ptr_qry,
1194 pssa_def_max);
1197 /* Return success regardless of the result and handle *PDATA
1198 in the caller. */
1199 get_range_strlen (src, pdata, 1);
1200 return true;
1203 if (idx < 0)
1205 /* SRC is a string of constant length. */
1206 pdata->minlen = build_int_cst (size_type_node, ~idx);
1207 pdata->maxlen = pdata->minlen;
1208 pdata->maxbound = pdata->maxlen;
1209 return true;
1212 if (strinfo *si = get_strinfo (idx))
1214 pdata->minlen = get_string_length (si);
1215 if (!pdata->minlen && si->nonzero_chars)
1217 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1218 pdata->minlen = si->nonzero_chars;
1219 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1221 int_range_max vr;
1222 ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
1223 if (vr.undefined_p () || vr.varying_p ())
1224 pdata->minlen = build_zero_cst (size_type_node);
1225 else
1227 tree type = vr.type ();
1228 pdata->minlen = wide_int_to_tree (type, vr.lower_bound ());
1231 else
1232 pdata->minlen = build_zero_cst (size_type_node);
1234 tree base = si->ptr;
1235 if (TREE_CODE (base) == ADDR_EXPR)
1236 base = TREE_OPERAND (base, 0);
1238 HOST_WIDE_INT off;
1239 poly_int64 poff;
1240 base = get_addr_base_and_unit_offset (base, &poff);
1241 if (base
1242 && DECL_P (base)
1243 && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1244 && TYPE_SIZE_UNIT (TREE_TYPE (base))
1245 && poff.is_constant (&off))
1247 tree basetype = TREE_TYPE (base);
1248 tree size = TYPE_SIZE_UNIT (basetype);
1249 if (TREE_CODE (size) == INTEGER_CST)
1251 ++off; /* Increment for the terminating nul. */
1252 tree toffset = build_int_cst (size_type_node, off);
1253 pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node,
1254 size, toffset);
1255 if (tree_int_cst_lt (pdata->maxlen, pdata->minlen))
1256 /* This can happen when triggering UB, when base is an
1257 array which is known to be filled with at least size
1258 non-zero bytes. E.g. for
1259 char a[2]; memcpy (a, "12", sizeof a);
1260 We don't want to create an invalid range [2, 1]
1261 where 2 comes from the number of non-zero bytes and
1262 1 from longest valid zero-terminated string that can
1263 be stored in such an array, so pick just one of
1264 those, pdata->minlen. See PR110603. */
1265 pdata->maxlen = build_all_ones_cst (size_type_node);
1266 else
1267 pdata->maxbound = pdata->maxlen;
1269 else
1270 pdata->maxlen = build_all_ones_cst (size_type_node);
1272 else
1273 pdata->maxlen = build_all_ones_cst (size_type_node);
1275 else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1277 int_range_max vr;
1278 ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, stmt);
1279 if (vr.varying_p () || vr.undefined_p ())
1281 pdata->minlen = build_zero_cst (size_type_node);
1282 pdata->maxlen = build_all_ones_cst (size_type_node);
1284 else
1286 tree type = vr.type ();
1287 pdata->minlen = wide_int_to_tree (type, vr.lower_bound ());
1288 pdata->maxlen = wide_int_to_tree (type, vr.upper_bound ());
1289 offset_int max = offset_int::from (vr.upper_bound (0), SIGNED);
1290 if (tree maxbound = get_maxbound (si->ptr, stmt, max, ptr_qry))
1291 pdata->maxbound = maxbound;
1292 else
1293 pdata->maxbound = pdata->maxlen;
1296 else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1298 pdata->maxlen = pdata->minlen;
1299 pdata->maxbound = pdata->minlen;
1301 else
1303 /* For PDATA->MINLEN that's a non-constant expression such
1304 as PLUS_EXPR whose value range is unknown, set the bounds
1305 to zero and SIZE_MAX. */
1306 pdata->minlen = build_zero_cst (size_type_node);
1307 pdata->maxlen = build_all_ones_cst (size_type_node);
1310 return true;
1313 return false;
1316 /* Analogous to get_range_strlen but for dynamically created strings,
1317 i.e., those created by calls to strcpy as opposed to just string
1318 constants.
1319 Try to obtain the range of the lengths of the string(s) referenced
1320 by SRC, or the size of the largest array SRC refers to if the range
1321 of lengths cannot be determined, and store all in *PDATA. RVALS
1322 points to the valuation engine used to calculate ranges. */
1324 void
1325 get_range_strlen_dynamic (tree src, gimple *stmt, c_strlen_data *pdata,
1326 pointer_query &ptr_qry)
1328 auto_bitmap visited;
1329 tree maxbound = pdata->maxbound;
1331 unsigned limit = param_ssa_name_def_chain_limit;
1332 if (!get_range_strlen_dynamic (src, stmt, pdata, visited, &ptr_qry, &limit))
1334 /* On failure extend the length range to an impossible maximum
1335 (a valid MAXLEN must be less than PTRDIFF_MAX - 1). Other
1336 members can stay unchanged regardless. */
1337 pdata->minlen = ssize_int (0);
1338 pdata->maxlen = build_all_ones_cst (size_type_node);
1340 else if (!pdata->minlen)
1341 pdata->minlen = ssize_int (0);
1343 /* If it's unchanged from it initial non-null value, set the conservative
1344 MAXBOUND to SIZE_MAX. Otherwise leave it null (if it is null). */
1345 if (maxbound && pdata->maxbound == maxbound)
1346 pdata->maxbound = build_all_ones_cst (size_type_node);
1349 /* Invalidate string length information for strings whose length might
1350 change due to stores in STMT, except those marked DONT_INVALIDATE.
1351 For string-modifying statements, ZERO_WRITE is set when the statement
1352 wrote only zeros.
1353 Returns true if any STRIDX_TO_STRINFO entries were considered
1354 for invalidation. */
1356 static bool
1357 maybe_invalidate (gimple *stmt, bool zero_write = false)
1359 if (dump_file && (dump_flags & TDF_DETAILS))
1361 fprintf (dump_file, "%s called for ", __func__);
1362 print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1365 strinfo *si;
1366 bool nonempty = false;
1368 for (unsigned i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
1370 if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1371 continue;
1373 nonempty = true;
1375 /* Unconditionally reset DONT_INVALIDATE. */
1376 bool dont_invalidate = si->dont_invalidate;
1377 si->dont_invalidate = false;
1379 if (dont_invalidate)
1380 continue;
1382 ao_ref r;
1383 tree size = si->nonzero_chars;
1384 ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1385 /* Include the terminating nul in the size of the string
1386 to consider when determining possible clobber. But do not
1387 add it to 'size' since we don't know whether it would
1388 actually fit the allocated area. */
1389 if (known_size_p (r.size))
1391 if (known_le (r.size, HOST_WIDE_INT_MAX - BITS_PER_UNIT))
1392 r.max_size += BITS_PER_UNIT;
1393 else
1394 r.max_size = -1;
1396 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1398 if (dump_file && (dump_flags & TDF_DETAILS))
1400 fputs (" statement may clobber object ", dump_file);
1401 print_generic_expr (dump_file, si->ptr);
1402 if (size && tree_fits_uhwi_p (size))
1403 fprintf (dump_file, " " HOST_WIDE_INT_PRINT_UNSIGNED
1404 " bytes in size", tree_to_uhwi (size));
1405 fputc ('\n', dump_file);
1408 set_strinfo (i, NULL);
1409 free_strinfo (si);
1410 continue;
1413 if (size
1414 && !zero_write
1415 && si->stmt
1416 && is_gimple_call (si->stmt)
1417 && (DECL_FUNCTION_CODE (gimple_call_fndecl (si->stmt))
1418 == BUILT_IN_CALLOC))
1420 /* If the clobber test above considered the length of
1421 the string (including the nul), then for (potentially)
1422 non-zero writes that might modify storage allocated by
1423 calloc consider the whole object and if it might be
1424 clobbered by the statement reset the statement. */
1425 ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1426 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1427 si->stmt = NULL;
1431 if (dump_file && (dump_flags & TDF_DETAILS))
1432 fprintf (dump_file, "%s returns %i\n", __func__, nonempty);
1434 return nonempty;
1437 /* Unshare strinfo record SI, if it has refcount > 1 or
1438 if stridx_to_strinfo vector is shared with some other
1439 bbs. */
1441 static strinfo *
1442 unshare_strinfo (strinfo *si)
1444 strinfo *nsi;
1446 if (si->refcount == 1 && !strinfo_shared ())
1447 return si;
1449 nsi = new_strinfo (si->ptr, si->idx, si->nonzero_chars, si->full_string_p);
1450 nsi->stmt = si->stmt;
1451 nsi->alloc = si->alloc;
1452 nsi->endptr = si->endptr;
1453 nsi->first = si->first;
1454 nsi->prev = si->prev;
1455 nsi->next = si->next;
1456 nsi->writable = si->writable;
1457 set_strinfo (si->idx, nsi);
1458 free_strinfo (si);
1459 return nsi;
1462 /* Attempt to create a new strinfo for BASESI + OFF, or find existing
1463 strinfo if there is any. Return it's idx, or 0 if no strinfo has
1464 been created. */
1466 static int
1467 get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1468 tree ptr)
1470 if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1471 return 0;
1473 if (compare_nonzero_chars (basesi, off) < 0
1474 || !tree_fits_uhwi_p (basesi->nonzero_chars))
1475 return 0;
1477 unsigned HOST_WIDE_INT nonzero_chars
1478 = tree_to_uhwi (basesi->nonzero_chars) - off;
1479 strinfo *si = basesi, *chainsi;
1480 if (si->first || si->prev || si->next)
1481 si = verify_related_strinfos (basesi);
1482 if (si == NULL
1483 || si->nonzero_chars == NULL_TREE
1484 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1485 return 0;
1487 if (TREE_CODE (ptr) == SSA_NAME
1488 && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1489 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1491 gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1492 for (chainsi = si; chainsi->next; chainsi = si)
1494 si = get_next_strinfo (chainsi);
1495 if (si == NULL
1496 || si->nonzero_chars == NULL_TREE
1497 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1498 break;
1499 int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1500 if (r != 1)
1502 if (r == 0)
1504 if (TREE_CODE (ptr) == SSA_NAME)
1505 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1506 else
1508 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1509 if (pidx != NULL && *pidx == 0)
1510 *pidx = si->idx;
1512 return si->idx;
1514 break;
1518 int idx = new_stridx (ptr);
1519 if (idx == 0)
1520 return 0;
1521 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, nonzero_chars),
1522 basesi->full_string_p);
1523 set_strinfo (idx, si);
1524 if (strinfo *nextsi = get_strinfo (chainsi->next))
1526 nextsi = unshare_strinfo (nextsi);
1527 si->next = nextsi->idx;
1528 nextsi->prev = idx;
1530 chainsi = unshare_strinfo (chainsi);
1531 if (chainsi->first == 0)
1532 chainsi->first = chainsi->idx;
1533 chainsi->next = idx;
1534 if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1535 chainsi->endptr = ptr;
1536 si->endptr = chainsi->endptr;
1537 si->prev = chainsi->idx;
1538 si->first = chainsi->first;
1539 si->writable = chainsi->writable;
1540 return si->idx;
1543 /* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1544 to a zero-length string and if possible chain it to a related strinfo
1545 chain whose part is or might be CHAINSI. */
1547 static strinfo *
1548 zero_length_string (tree ptr, strinfo *chainsi)
1550 strinfo *si;
1551 int idx;
1552 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1553 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1554 gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1555 && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1557 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1558 return NULL;
1559 if (chainsi != NULL)
1561 si = verify_related_strinfos (chainsi);
1562 if (si)
1566 /* We shouldn't mix delayed and non-delayed lengths. */
1567 gcc_assert (si->full_string_p);
1568 if (si->endptr == NULL_TREE)
1570 si = unshare_strinfo (si);
1571 si->endptr = ptr;
1573 chainsi = si;
1574 si = get_next_strinfo (si);
1576 while (si != NULL);
1577 if (zero_length_string_p (chainsi))
1579 if (chainsi->next)
1581 chainsi = unshare_strinfo (chainsi);
1582 chainsi->next = 0;
1584 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1585 return chainsi;
1588 else
1590 /* We shouldn't mix delayed and non-delayed lengths. */
1591 gcc_assert (chainsi->full_string_p);
1592 if (chainsi->first || chainsi->prev || chainsi->next)
1594 chainsi = unshare_strinfo (chainsi);
1595 chainsi->first = 0;
1596 chainsi->prev = 0;
1597 chainsi->next = 0;
1601 idx = new_stridx (ptr);
1602 if (idx == 0)
1603 return NULL;
1604 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
1605 set_strinfo (idx, si);
1606 si->endptr = ptr;
1607 if (chainsi != NULL)
1609 chainsi = unshare_strinfo (chainsi);
1610 if (chainsi->first == 0)
1611 chainsi->first = chainsi->idx;
1612 chainsi->next = idx;
1613 if (chainsi->endptr == NULL_TREE)
1614 chainsi->endptr = ptr;
1615 si->prev = chainsi->idx;
1616 si->first = chainsi->first;
1617 si->writable = chainsi->writable;
1619 return si;
1622 /* For strinfo ORIGSI whose length has been just updated, adjust other
1623 related strinfos so that they match the new ORIGSI. This involves:
1625 - adding ADJ to the nonzero_chars fields
1626 - copying full_string_p from the new ORIGSI. */
1628 static void
1629 adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1631 strinfo *si = verify_related_strinfos (origsi);
1633 if (si == NULL)
1634 return;
1636 while (1)
1638 strinfo *nsi;
1640 if (si != origsi)
1642 tree tem;
1644 si = unshare_strinfo (si);
1645 /* We shouldn't see delayed lengths here; the caller must
1646 have calculated the old length in order to calculate
1647 the adjustment. */
1648 gcc_assert (si->nonzero_chars);
1649 tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1650 si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1651 TREE_TYPE (si->nonzero_chars),
1652 si->nonzero_chars, tem);
1653 si->full_string_p = origsi->full_string_p;
1655 si->endptr = NULL_TREE;
1656 si->dont_invalidate = true;
1658 nsi = get_next_strinfo (si);
1659 if (nsi == NULL)
1660 return;
1661 si = nsi;
1665 /* Find if there are other SSA_NAME pointers equal to PTR
1666 for which we don't track their string lengths yet. If so, use
1667 IDX for them. */
1669 static void
1670 find_equal_ptrs (tree ptr, int idx)
1672 if (TREE_CODE (ptr) != SSA_NAME)
1673 return;
1674 while (1)
1676 gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1677 if (!is_gimple_assign (stmt))
1678 return;
1679 ptr = gimple_assign_rhs1 (stmt);
1680 switch (gimple_assign_rhs_code (stmt))
1682 case SSA_NAME:
1683 break;
1684 CASE_CONVERT:
1685 if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1686 return;
1687 if (TREE_CODE (ptr) == SSA_NAME)
1688 break;
1689 if (TREE_CODE (ptr) != ADDR_EXPR)
1690 return;
1691 /* FALLTHRU */
1692 case ADDR_EXPR:
1694 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1695 if (pidx != NULL && *pidx == 0)
1696 *pidx = idx;
1697 return;
1699 default:
1700 return;
1703 /* We might find an endptr created in this pass. Grow the
1704 vector in that case. */
1705 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1706 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1708 if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1709 return;
1710 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1714 /* Return true if STMT is a call to a builtin function with the right
1715 arguments and attributes that should be considered for optimization
1716 by this pass. */
1718 static bool
1719 valid_builtin_call (gimple *stmt)
1721 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1722 return false;
1724 tree callee = gimple_call_fndecl (stmt);
1725 switch (DECL_FUNCTION_CODE (callee))
1727 case BUILT_IN_MEMCMP:
1728 case BUILT_IN_MEMCMP_EQ:
1729 case BUILT_IN_STRCMP:
1730 case BUILT_IN_STRNCMP:
1731 case BUILT_IN_STRCHR:
1732 case BUILT_IN_STRLEN:
1733 case BUILT_IN_STRNLEN:
1734 /* The above functions should be pure. Punt if they aren't. */
1735 if (gimple_vdef (stmt) || gimple_vuse (stmt) == NULL_TREE)
1736 return false;
1737 break;
1739 case BUILT_IN_ALLOCA:
1740 case BUILT_IN_ALLOCA_WITH_ALIGN:
1741 case BUILT_IN_CALLOC:
1742 case BUILT_IN_MALLOC:
1743 case BUILT_IN_MEMCPY:
1744 case BUILT_IN_MEMCPY_CHK:
1745 case BUILT_IN_MEMPCPY:
1746 case BUILT_IN_MEMPCPY_CHK:
1747 case BUILT_IN_MEMSET:
1748 case BUILT_IN_STPCPY:
1749 case BUILT_IN_STPCPY_CHK:
1750 case BUILT_IN_STPNCPY:
1751 case BUILT_IN_STPNCPY_CHK:
1752 case BUILT_IN_STRCAT:
1753 case BUILT_IN_STRCAT_CHK:
1754 case BUILT_IN_STRCPY:
1755 case BUILT_IN_STRCPY_CHK:
1756 case BUILT_IN_STRNCAT:
1757 case BUILT_IN_STRNCAT_CHK:
1758 case BUILT_IN_STRNCPY:
1759 case BUILT_IN_STRNCPY_CHK:
1760 /* The above functions should be neither const nor pure. Punt if they
1761 aren't. */
1762 if (gimple_vdef (stmt) == NULL_TREE || gimple_vuse (stmt) == NULL_TREE)
1763 return false;
1764 break;
1766 default:
1767 break;
1770 return true;
1773 /* If the last .MEM setter statement before STMT is
1774 memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1775 and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1776 just memcpy (x, y, strlen (y)). SI must be the zero length
1777 strinfo. */
1779 void
1780 strlen_pass::adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat)
1782 tree vuse, callee, len;
1783 struct laststmt_struct last = laststmt;
1784 strinfo *lastsi, *firstsi;
1785 unsigned len_arg_no = 2;
1787 laststmt.stmt = NULL;
1788 laststmt.len = NULL_TREE;
1789 laststmt.stridx = 0;
1791 if (last.stmt == NULL)
1792 return;
1794 vuse = gimple_vuse (stmt);
1795 if (vuse == NULL_TREE
1796 || SSA_NAME_DEF_STMT (vuse) != last.stmt
1797 || !has_single_use (vuse))
1798 return;
1800 gcc_assert (last.stridx > 0);
1801 lastsi = get_strinfo (last.stridx);
1802 if (lastsi == NULL)
1803 return;
1805 if (lastsi != si)
1807 if (lastsi->first == 0 || lastsi->first != si->first)
1808 return;
1810 firstsi = verify_related_strinfos (si);
1811 if (firstsi == NULL)
1812 return;
1813 while (firstsi != lastsi)
1815 firstsi = get_next_strinfo (firstsi);
1816 if (firstsi == NULL)
1817 return;
1821 if (!is_strcat && !zero_length_string_p (si))
1822 return;
1824 if (is_gimple_assign (last.stmt))
1826 gimple_stmt_iterator gsi;
1828 if (!integer_zerop (gimple_assign_rhs1 (last.stmt)))
1829 return;
1830 if (stmt_could_throw_p (cfun, last.stmt))
1831 return;
1832 gsi = gsi_for_stmt (last.stmt);
1833 unlink_stmt_vdef (last.stmt);
1834 release_defs (last.stmt);
1835 gsi_remove (&gsi, true);
1836 return;
1839 if (!valid_builtin_call (last.stmt))
1840 return;
1842 callee = gimple_call_fndecl (last.stmt);
1843 switch (DECL_FUNCTION_CODE (callee))
1845 case BUILT_IN_MEMCPY:
1846 case BUILT_IN_MEMCPY_CHK:
1847 break;
1848 default:
1849 return;
1852 len = gimple_call_arg (last.stmt, len_arg_no);
1853 if (tree_fits_uhwi_p (len))
1855 if (!tree_fits_uhwi_p (last.len)
1856 || integer_zerop (len)
1857 || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1858 return;
1859 /* Don't adjust the length if it is divisible by 4, it is more efficient
1860 to store the extra '\0' in that case. */
1861 if ((tree_to_uhwi (len) & 3) == 0)
1862 return;
1864 /* Don't fold away an out of bounds access, as this defeats proper
1865 warnings. */
1866 tree dst = gimple_call_arg (last.stmt, 0);
1868 access_ref aref;
1869 tree size = compute_objsize (dst, stmt, 1, &aref, &ptr_qry);
1870 if (size && tree_int_cst_lt (size, len))
1871 return;
1873 else if (TREE_CODE (len) == SSA_NAME)
1875 gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1876 if (!is_gimple_assign (def_stmt)
1877 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
1878 || gimple_assign_rhs1 (def_stmt) != last.len
1879 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
1880 return;
1882 else
1883 return;
1885 gimple_call_set_arg (last.stmt, len_arg_no, last.len);
1886 update_stmt (last.stmt);
1889 /* For an LHS that is an SSA_NAME that is the result of a strlen()
1890 call, or when BOUND is non-null, of a strnlen() call, set LHS
1891 range info to [0, min (MAX, BOUND)] when the range includes more
1892 than one value and return LHS. Otherwise, when the range
1893 [MIN, MAX] is such that MIN == MAX, return the tree representation
1894 of (MIN). The latter allows callers to fold suitable strnlen() calls
1895 to constants. */
1897 tree
1898 set_strlen_range (tree lhs, wide_int min, wide_int max,
1899 tree bound /* = NULL_TREE */)
1901 if (TREE_CODE (lhs) != SSA_NAME
1902 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1903 return NULL_TREE;
1905 if (bound)
1907 /* For strnlen, adjust MIN and MAX as necessary. If the bound
1908 is less than the size of the array set MAX to it. It it's
1909 greater than MAX and MAX is non-zero bump MAX down to account
1910 for the necessary terminating nul. Otherwise leave it alone. */
1911 if (TREE_CODE (bound) == INTEGER_CST)
1913 wide_int wibnd = wi::to_wide (bound);
1914 int cmp = wi::cmpu (wibnd, max);
1915 if (cmp < 0)
1916 max = wibnd;
1917 else if (cmp && wi::ne_p (max, min))
1918 --max;
1920 else if (TREE_CODE (bound) == SSA_NAME)
1922 int_range_max r;
1923 get_range_query (cfun)->range_of_expr (r, bound);
1924 if (!r.undefined_p ())
1926 /* For a bound in a known range, adjust the range determined
1927 above as necessary. For a bound in some anti-range or
1928 in an unknown range, use the range determined by callers. */
1929 if (wi::ltu_p (r.lower_bound (), min))
1930 min = r.lower_bound ();
1931 if (wi::ltu_p (r.upper_bound (), max))
1932 max = r.upper_bound ();
1937 if (min == max)
1938 return wide_int_to_tree (size_type_node, min);
1940 int_range_max vr (TREE_TYPE (lhs), min, max);
1941 set_range_info (lhs, vr);
1942 return lhs;
1945 /* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1946 SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1947 a character array A[N] with unknown length bounded by N, and for
1948 strnlen(), by min (N, BOUND). */
1950 static tree
1951 maybe_set_strlen_range (tree lhs, tree src, tree bound)
1953 if (TREE_CODE (lhs) != SSA_NAME
1954 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1955 return NULL_TREE;
1957 if (TREE_CODE (src) == SSA_NAME)
1959 gimple *def = SSA_NAME_DEF_STMT (src);
1960 if (is_gimple_assign (def)
1961 && gimple_assign_rhs_code (def) == ADDR_EXPR)
1962 src = gimple_assign_rhs1 (def);
1965 /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1966 NUL so that the difference between a pointer to just past it and
1967 one to its beginning is positive. */
1968 wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1970 if (TREE_CODE (src) == ADDR_EXPR)
1972 /* The last array member of a struct can be bigger than its size
1973 suggests if it's treated as a poor-man's flexible array member. */
1974 src = TREE_OPERAND (src, 0);
1975 if (TREE_CODE (src) != MEM_REF
1976 && !array_ref_flexible_size_p (src))
1978 tree type = TREE_TYPE (src);
1979 tree size = TYPE_SIZE_UNIT (type);
1980 if (size
1981 && TREE_CODE (size) == INTEGER_CST
1982 && !integer_zerop (size))
1984 /* Even though such uses of strlen would be undefined,
1985 avoid relying on arrays of arrays in case some genius
1986 decides to call strlen on an unterminated array element
1987 that's followed by a terminated one. Likewise, avoid
1988 assuming that a struct array member is necessarily
1989 nul-terminated (the nul may be in the member that
1990 follows). In those cases, assume that the length
1991 of the string stored in such an array is bounded
1992 by the size of the enclosing object if one can be
1993 determined. */
1994 tree base = get_base_address (src);
1995 if (VAR_P (base))
1997 if (tree size = DECL_SIZE_UNIT (base))
1998 if (size
1999 && TREE_CODE (size) == INTEGER_CST
2000 && TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
2001 max = wi::to_wide (size);
2005 /* For strlen() the upper bound above is equal to
2006 the longest string that can be stored in the array
2007 (i.e., it accounts for the terminating nul. For
2008 strnlen() bump up the maximum by one since the array
2009 need not be nul-terminated. */
2010 if (!bound && max != 0)
2011 --max;
2015 wide_int min = wi::zero (max.get_precision ());
2016 return set_strlen_range (lhs, min, max, bound);
2019 /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
2020 either into a region allocated for the object SI when non-null,
2021 or into an object designated by the LHS of STMT otherwise.
2022 For a call STMT, when CALL_LHS is set use its left hand side
2023 as the destination, otherwise use argument zero.
2024 When nonnull uses RVALS to determine range information.
2025 RAWMEM may be set by memcpy and other raw memory functions
2026 to allow accesses across subobject boundaries. */
2028 void
2029 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
2030 strinfo *si, bool plus_one, bool rawmem)
2032 if (!len || warning_suppressed_p (stmt, OPT_Wstringop_overflow_))
2033 return;
2035 /* The DECL of the function performing the write if it is done
2036 by one. */
2037 tree writefn = NULL_TREE;
2038 /* The destination expression involved in the store or call STMT. */
2039 tree dest = NULL_TREE;
2041 if (is_gimple_assign (stmt))
2042 dest = gimple_assign_lhs (stmt);
2043 else if (is_gimple_call (stmt))
2045 if (call_lhs)
2046 dest = gimple_call_lhs (stmt);
2047 else
2049 gcc_assert (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL));
2050 dest = gimple_call_arg (stmt, 0);
2053 if (!dest)
2054 return;
2055 writefn = gimple_call_fndecl (stmt);
2057 else
2058 return;
2060 if (warning_suppressed_p (dest, OPT_Wstringop_overflow_))
2061 return;
2063 const int ostype = rawmem ? 0 : 1;
2065 /* Use maximum precision to avoid overflow in the addition below.
2066 Make sure all operands have the same precision to keep wide_int
2067 from ICE'ing. */
2069 access_ref aref;
2070 /* The size of the destination region (which is smaller than
2071 the destination object for stores at a non-zero offset). */
2072 tree destsize = compute_objsize (dest, stmt, ostype, &aref, &ptr_qry);
2074 if (!destsize)
2076 aref.sizrng[0] = 0;
2077 aref.sizrng[1] = wi::to_offset (max_object_size ());
2080 /* Return early if the DESTSIZE size expression is the same as LEN
2081 and the offset into the destination is zero. This might happen
2082 in the case of a pair of malloc and memset calls to allocate
2083 an object and clear it as if by calloc. */
2084 if (destsize == len && !plus_one
2085 && aref.offrng[0] == 0 && aref.offrng[0] == aref.offrng[1])
2086 return;
2088 wide_int rng[2];
2089 if (!get_range (len, stmt, rng, ptr_qry.rvals))
2090 return;
2092 widest_int lenrng[2] =
2093 { widest_int::from (rng[0], SIGNED), widest_int::from (rng[1], SIGNED) };
2095 if (plus_one)
2097 lenrng[0] += 1;
2098 lenrng[1] += 1;
2101 /* The size of the remaining space in the destination computed
2102 as the size of the latter minus the offset into it. */
2103 widest_int spcrng[2];
2105 offset_int remrng[2];
2106 remrng[1] = aref.size_remaining (remrng);
2107 spcrng[0] = remrng[0] == -1 ? 0 : widest_int::from (remrng[0], UNSIGNED);
2108 spcrng[1] = widest_int::from (remrng[1], UNSIGNED);
2111 if (wi::leu_p (lenrng[0], spcrng[0])
2112 && wi::leu_p (lenrng[1], spcrng[1]))
2113 return;
2115 location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
2116 bool warned = false;
2117 if (wi::leu_p (lenrng[0], spcrng[1]))
2119 if (len != destsize
2120 && (!si || rawmem || !is_strlen_related_p (si->ptr, len)))
2121 return;
2123 warned = (writefn
2124 ? warning_at (loc, OPT_Wstringop_overflow_,
2125 "%qD writing one too many bytes into a region "
2126 "of a size that depends on %<strlen%>",
2127 writefn)
2128 : warning_at (loc, OPT_Wstringop_overflow_,
2129 "writing one too many bytes into a region "
2130 "of a size that depends on %<strlen%>"));
2132 else if (lenrng[0] == lenrng[1])
2134 if (spcrng[0] == spcrng[1])
2135 warned = (writefn
2136 ? warning_n (loc, OPT_Wstringop_overflow_,
2137 lenrng[0].to_uhwi (),
2138 "%qD writing %wu byte into a region "
2139 "of size %wu",
2140 "%qD writing %wu bytes into a region "
2141 "of size %wu",
2142 writefn, lenrng[0].to_uhwi (),
2143 spcrng[0].to_uhwi ())
2144 : warning_n (loc, OPT_Wstringop_overflow_,
2145 lenrng[0].to_uhwi (),
2146 "writing %wu byte into a region "
2147 "of size %wu",
2148 "writing %wu bytes into a region "
2149 "of size %wu",
2150 lenrng[0].to_uhwi (),
2151 spcrng[0].to_uhwi ()));
2152 else
2153 warned = (writefn
2154 ? warning_n (loc, OPT_Wstringop_overflow_,
2155 lenrng[0].to_uhwi (),
2156 "%qD writing %wu byte into a region "
2157 "of size between %wu and %wu",
2158 "%qD writing %wu bytes into a region "
2159 "of size between %wu and %wu",
2160 writefn, lenrng[0].to_uhwi (),
2161 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2162 : warning_n (loc, OPT_Wstringop_overflow_,
2163 lenrng[0].to_uhwi (),
2164 "writing %wu byte into a region "
2165 "of size between %wu and %wu",
2166 "writing %wu bytes into a region "
2167 "of size between %wu and %wu",
2168 lenrng[0].to_uhwi (),
2169 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2171 else if (spcrng[0] == spcrng[1])
2172 warned = (writefn
2173 ? warning_at (loc, OPT_Wstringop_overflow_,
2174 "%qD writing between %wu and %wu bytes "
2175 "into a region of size %wu",
2176 writefn, lenrng[0].to_uhwi (),
2177 lenrng[1].to_uhwi (),
2178 spcrng[0].to_uhwi ())
2179 : warning_at (loc, OPT_Wstringop_overflow_,
2180 "writing between %wu and %wu bytes "
2181 "into a region of size %wu",
2182 lenrng[0].to_uhwi (),
2183 lenrng[1].to_uhwi (),
2184 spcrng[0].to_uhwi ()));
2185 else
2186 warned = (writefn
2187 ? warning_at (loc, OPT_Wstringop_overflow_,
2188 "%qD writing between %wu and %wu bytes "
2189 "into a region of size between %wu and %wu",
2190 writefn, lenrng[0].to_uhwi (),
2191 lenrng[1].to_uhwi (),
2192 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2193 : warning_at (loc, OPT_Wstringop_overflow_,
2194 "writing between %wu and %wu bytes "
2195 "into a region of size between %wu and %wu",
2196 lenrng[0].to_uhwi (),
2197 lenrng[1].to_uhwi (),
2198 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2200 if (!warned)
2201 return;
2203 suppress_warning (stmt, OPT_Wstringop_overflow_);
2205 aref.inform_access (access_write_only);
2208 /* Convenience wrapper for the above. */
2210 void
2211 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs,
2212 unsigned HOST_WIDE_INT len,
2213 strinfo *si, bool plus_one, bool rawmem)
2215 tree tlen = build_int_cst (size_type_node, len);
2216 maybe_warn_overflow (stmt, call_lhs, tlen, si, plus_one, rawmem);
2219 /* Handle a strlen call. If strlen of the argument is known, replace
2220 the strlen call with the known value, otherwise remember that strlen
2221 of the argument is stored in the lhs SSA_NAME. */
2223 void
2224 strlen_pass::handle_builtin_strlen ()
2226 gimple *stmt = gsi_stmt (m_gsi);
2227 tree lhs = gimple_call_lhs (stmt);
2229 if (lhs == NULL_TREE)
2230 return;
2232 location_t loc = gimple_location (stmt);
2233 tree callee = gimple_call_fndecl (stmt);
2234 tree src = gimple_call_arg (stmt, 0);
2235 tree bound = (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNLEN
2236 ? gimple_call_arg (stmt, 1) : NULL_TREE);
2237 int idx = get_stridx (src, stmt);
2238 if (idx || (bound && integer_zerop (bound)))
2240 strinfo *si = NULL;
2241 tree rhs;
2243 if (idx < 0)
2244 rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2245 else if (idx == 0)
2246 rhs = bound;
2247 else
2249 rhs = NULL_TREE;
2250 si = get_strinfo (idx);
2251 if (si != NULL)
2253 rhs = get_string_length (si);
2254 /* For strnlen, if bound is constant, even if si is not known
2255 to be zero terminated, if we know at least bound bytes are
2256 not zero, the return value will be bound. */
2257 if (rhs == NULL_TREE
2258 && bound != NULL_TREE
2259 && TREE_CODE (bound) == INTEGER_CST
2260 && si->nonzero_chars != NULL_TREE
2261 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2262 && tree_int_cst_le (bound, si->nonzero_chars))
2263 rhs = bound;
2266 if (rhs != NULL_TREE)
2268 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2270 fprintf (dump_file, "Optimizing: ");
2271 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2273 rhs = unshare_expr (rhs);
2274 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2275 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2277 if (bound)
2278 rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2280 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2281 stmt = gsi_stmt (m_gsi);
2282 update_stmt (stmt);
2283 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2285 fprintf (dump_file, "into: ");
2286 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2289 if (si != NULL
2290 /* Don't update anything for strnlen. */
2291 && bound == NULL_TREE
2292 && TREE_CODE (si->nonzero_chars) != SSA_NAME
2293 && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2294 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2296 si = unshare_strinfo (si);
2297 si->nonzero_chars = lhs;
2298 gcc_assert (si->full_string_p);
2301 if (strlen_to_stridx
2302 && (bound == NULL_TREE
2303 /* For strnlen record this only if the call is proven
2304 to return the same value as strlen would. */
2305 || (TREE_CODE (bound) == INTEGER_CST
2306 && TREE_CODE (rhs) == INTEGER_CST
2307 && tree_int_cst_lt (rhs, bound))))
2308 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2310 return;
2313 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2314 return;
2316 if (idx == 0)
2317 idx = new_stridx (src);
2318 else
2320 strinfo *si = get_strinfo (idx);
2321 if (si != NULL)
2323 if (!si->full_string_p && !si->stmt)
2325 /* Until now we only had a lower bound on the string length.
2326 Install LHS as the actual length. */
2327 si = unshare_strinfo (si);
2328 tree old = si->nonzero_chars;
2329 si->nonzero_chars = lhs;
2330 si->full_string_p = true;
2331 if (old && TREE_CODE (old) == INTEGER_CST)
2333 old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2334 tree adj = fold_build2_loc (loc, MINUS_EXPR,
2335 TREE_TYPE (lhs), lhs, old);
2336 adjust_related_strinfos (loc, si, adj);
2337 /* Use the constant minimum length as the lower bound
2338 of the non-constant length. */
2339 wide_int min = wi::to_wide (old);
2340 wide_int max
2341 = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2342 if (wi::gtu_p (min, max))
2343 max = wi::to_wide (TYPE_MAX_VALUE (TREE_TYPE (lhs)));
2344 set_strlen_range (lhs, min, max);
2346 else
2348 si->first = 0;
2349 si->prev = 0;
2350 si->next = 0;
2353 return;
2356 if (idx)
2358 if (!bound)
2360 /* Only store the new length information for calls to strlen(),
2361 not for those to strnlen(). */
2362 strinfo *si = new_strinfo (src, idx, lhs, true);
2363 set_strinfo (idx, si);
2364 find_equal_ptrs (src, idx);
2367 /* For SRC that is an array of N elements, set LHS's range
2368 to [0, min (N, BOUND)]. A constant return value means
2369 the range would have consisted of a single value. In
2370 that case, fold the result into the returned constant. */
2371 if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2372 if (TREE_CODE (ret) == INTEGER_CST)
2374 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2376 fprintf (dump_file, "Optimizing: ");
2377 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2379 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2380 ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2381 gimplify_and_update_call_from_tree (&m_gsi, ret);
2382 stmt = gsi_stmt (m_gsi);
2383 update_stmt (stmt);
2384 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2386 fprintf (dump_file, "into: ");
2387 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2391 if (strlen_to_stridx && !bound)
2392 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2396 /* Handle a strchr call. If strlen of the first argument is known, replace
2397 the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2398 that lhs of the call is endptr and strlen of the argument is endptr - x. */
2400 void
2401 strlen_pass::handle_builtin_strchr ()
2403 gimple *stmt = gsi_stmt (m_gsi);
2404 tree lhs = gimple_call_lhs (stmt);
2406 if (lhs == NULL_TREE)
2407 return;
2409 if (!integer_zerop (gimple_call_arg (stmt, 1)))
2410 return;
2412 tree src = gimple_call_arg (stmt, 0);
2414 /* Avoid folding if the first argument is not a nul-terminated array.
2415 Defer warning until later. */
2416 if (!check_nul_terminated_array (NULL_TREE, src))
2417 return;
2419 int idx = get_stridx (src, stmt);
2420 if (idx)
2422 strinfo *si = NULL;
2423 tree rhs;
2425 if (idx < 0)
2426 rhs = build_int_cst (size_type_node, ~idx);
2427 else
2429 rhs = NULL_TREE;
2430 si = get_strinfo (idx);
2431 if (si != NULL)
2432 rhs = get_string_length (si);
2434 if (rhs != NULL_TREE)
2436 location_t loc = gimple_location (stmt);
2438 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2440 fprintf (dump_file, "Optimizing: ");
2441 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2443 if (si != NULL && si->endptr != NULL_TREE)
2445 rhs = unshare_expr (si->endptr);
2446 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2447 TREE_TYPE (rhs)))
2448 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2450 else
2452 rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2453 rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2454 TREE_TYPE (src), src, rhs);
2455 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2456 TREE_TYPE (rhs)))
2457 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2459 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2460 stmt = gsi_stmt (m_gsi);
2461 update_stmt (stmt);
2462 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2464 fprintf (dump_file, "into: ");
2465 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2467 if (si != NULL
2468 && si->endptr == NULL_TREE
2469 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2471 si = unshare_strinfo (si);
2472 si->endptr = lhs;
2474 zero_length_string (lhs, si);
2475 return;
2478 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2479 return;
2480 if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2482 if (idx == 0)
2483 idx = new_stridx (src);
2484 else if (get_strinfo (idx) != NULL)
2486 zero_length_string (lhs, NULL);
2487 return;
2489 if (idx)
2491 location_t loc = gimple_location (stmt);
2492 tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2493 tree srcu = fold_convert_loc (loc, size_type_node, src);
2494 tree length = fold_build2_loc (loc, MINUS_EXPR,
2495 size_type_node, lhsu, srcu);
2496 strinfo *si = new_strinfo (src, idx, length, true);
2497 si->endptr = lhs;
2498 set_strinfo (idx, si);
2499 find_equal_ptrs (src, idx);
2500 zero_length_string (lhs, si);
2503 else
2504 zero_length_string (lhs, NULL);
2507 /* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2508 If strlen of the second argument is known, strlen of the first argument
2509 is the same after this call. Furthermore, attempt to convert it to
2510 memcpy. Uses RVALS to determine range information. */
2512 void
2513 strlen_pass::handle_builtin_strcpy (built_in_function bcode)
2515 int idx, didx;
2516 tree src, dst, srclen, len, lhs, type, fn, oldlen;
2517 bool success;
2518 gimple *stmt = gsi_stmt (m_gsi);
2519 strinfo *si, *dsi, *olddsi, *zsi;
2520 location_t loc;
2522 src = gimple_call_arg (stmt, 1);
2523 dst = gimple_call_arg (stmt, 0);
2524 lhs = gimple_call_lhs (stmt);
2525 idx = get_stridx (src, stmt);
2526 si = NULL;
2527 if (idx > 0)
2528 si = get_strinfo (idx);
2530 didx = get_stridx (dst, stmt);
2531 olddsi = NULL;
2532 oldlen = NULL_TREE;
2533 if (didx > 0)
2534 olddsi = get_strinfo (didx);
2535 else if (didx < 0)
2536 return;
2538 if (olddsi != NULL)
2539 adjust_last_stmt (olddsi, stmt, false);
2541 srclen = NULL_TREE;
2542 if (si != NULL)
2543 srclen = get_string_length (si);
2544 else if (idx < 0)
2545 srclen = build_int_cst (size_type_node, ~idx);
2547 maybe_warn_overflow (stmt, false, srclen, olddsi, true);
2549 if (olddsi != NULL)
2550 adjust_last_stmt (olddsi, stmt, false);
2552 loc = gimple_location (stmt);
2553 if (srclen == NULL_TREE)
2554 switch (bcode)
2556 case BUILT_IN_STRCPY:
2557 case BUILT_IN_STRCPY_CHK:
2558 if (lhs != NULL_TREE || !builtin_decl_implicit_p (BUILT_IN_STPCPY))
2559 return;
2560 break;
2561 case BUILT_IN_STPCPY:
2562 case BUILT_IN_STPCPY_CHK:
2563 if (lhs == NULL_TREE)
2564 return;
2565 else
2567 tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2568 srclen = fold_convert_loc (loc, size_type_node, dst);
2569 srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2570 lhsuint, srclen);
2572 break;
2573 default:
2574 gcc_unreachable ();
2577 if (didx == 0)
2579 didx = new_stridx (dst);
2580 if (didx == 0)
2581 return;
2583 if (olddsi != NULL)
2585 oldlen = olddsi->nonzero_chars;
2586 dsi = unshare_strinfo (olddsi);
2587 dsi->nonzero_chars = srclen;
2588 dsi->full_string_p = (srclen != NULL_TREE);
2589 /* Break the chain, so adjust_related_strinfo on later pointers in
2590 the chain won't adjust this one anymore. */
2591 dsi->next = 0;
2592 dsi->stmt = NULL;
2593 dsi->endptr = NULL_TREE;
2595 else
2597 dsi = new_strinfo (dst, didx, srclen, srclen != NULL_TREE);
2598 set_strinfo (didx, dsi);
2599 find_equal_ptrs (dst, didx);
2601 dsi->writable = true;
2602 dsi->dont_invalidate = true;
2604 if (dsi->nonzero_chars == NULL_TREE)
2606 strinfo *chainsi;
2608 /* If string length of src is unknown, use delayed length
2609 computation. If string length of dst will be needed, it
2610 can be computed by transforming this strcpy call into
2611 stpcpy and subtracting dst from the return value. */
2613 /* Look for earlier strings whose length could be determined if
2614 this strcpy is turned into an stpcpy. */
2616 if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL)
2618 for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next))
2620 /* When setting a stmt for delayed length computation
2621 prevent all strinfos through dsi from being
2622 invalidated. */
2623 chainsi = unshare_strinfo (chainsi);
2624 chainsi->stmt = stmt;
2625 chainsi->nonzero_chars = NULL_TREE;
2626 chainsi->full_string_p = false;
2627 chainsi->endptr = NULL_TREE;
2628 chainsi->dont_invalidate = true;
2631 dsi->stmt = stmt;
2633 /* Try to detect overlap before returning. This catches cases
2634 like strcpy (d, d + n) where n is non-constant whose range
2635 is such that (n <= strlen (d) holds).
2637 OLDDSI->NONZERO_chars may have been reset by this point with
2638 oldlen holding it original value. */
2639 if (olddsi && oldlen)
2641 /* Add 1 for the terminating NUL. */
2642 tree type = TREE_TYPE (oldlen);
2643 oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2644 build_int_cst (type, 1));
2645 check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2648 return;
2651 if (olddsi != NULL)
2653 tree adj = NULL_TREE;
2654 if (oldlen == NULL_TREE)
2656 else if (integer_zerop (oldlen))
2657 adj = srclen;
2658 else if (TREE_CODE (oldlen) == INTEGER_CST
2659 || TREE_CODE (srclen) == INTEGER_CST)
2660 adj = fold_build2_loc (loc, MINUS_EXPR,
2661 TREE_TYPE (srclen), srclen,
2662 fold_convert_loc (loc, TREE_TYPE (srclen),
2663 oldlen));
2664 if (adj != NULL_TREE)
2665 adjust_related_strinfos (loc, dsi, adj);
2666 else
2667 dsi->prev = 0;
2669 /* strcpy src may not overlap dst, so src doesn't need to be
2670 invalidated either. */
2671 if (si != NULL)
2672 si->dont_invalidate = true;
2674 fn = NULL_TREE;
2675 zsi = NULL;
2676 switch (bcode)
2678 case BUILT_IN_STRCPY:
2679 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
2680 if (lhs)
2681 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2682 break;
2683 case BUILT_IN_STRCPY_CHK:
2684 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
2685 if (lhs)
2686 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2687 break;
2688 case BUILT_IN_STPCPY:
2689 /* This would need adjustment of the lhs (subtract one),
2690 or detection that the trailing '\0' doesn't need to be
2691 written, if it will be immediately overwritten.
2692 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY); */
2693 if (lhs)
2695 dsi->endptr = lhs;
2696 zsi = zero_length_string (lhs, dsi);
2698 break;
2699 case BUILT_IN_STPCPY_CHK:
2700 /* This would need adjustment of the lhs (subtract one),
2701 or detection that the trailing '\0' doesn't need to be
2702 written, if it will be immediately overwritten.
2703 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK); */
2704 if (lhs)
2706 dsi->endptr = lhs;
2707 zsi = zero_length_string (lhs, dsi);
2709 break;
2710 default:
2711 gcc_unreachable ();
2713 if (zsi != NULL)
2714 zsi->dont_invalidate = true;
2716 if (fn)
2718 tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2719 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2721 else
2722 type = size_type_node;
2724 len = fold_convert_loc (loc, type, unshare_expr (srclen));
2725 len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2727 /* Disable warning for the transformed statement? */
2728 opt_code no_warning_opt = no_warning;
2730 if (const strinfo *chksi = si ? olddsi ? olddsi : dsi : NULL)
2732 no_warning_opt = check_bounds_or_overlap (stmt, chksi->ptr, si->ptr,
2733 NULL_TREE, len);
2734 if (no_warning_opt)
2735 suppress_warning (stmt, no_warning_opt);
2738 if (fn == NULL_TREE)
2739 return;
2741 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
2742 GSI_SAME_STMT);
2743 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2745 fprintf (dump_file, "Optimizing: ");
2746 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2748 if (gimple_call_num_args (stmt) == 2)
2749 success = update_gimple_call (&m_gsi, fn, 3, dst, src, len);
2750 else
2751 success = update_gimple_call (&m_gsi, fn, 4, dst, src, len,
2752 gimple_call_arg (stmt, 2));
2753 if (success)
2755 stmt = gsi_stmt (m_gsi);
2756 update_stmt (stmt);
2757 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2759 fprintf (dump_file, "into: ");
2760 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2762 /* Allow adjust_last_stmt to decrease this memcpy's size. */
2763 laststmt.stmt = stmt;
2764 laststmt.len = srclen;
2765 laststmt.stridx = dsi->idx;
2767 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2768 fprintf (dump_file, "not possible.\n");
2770 if (no_warning_opt)
2771 suppress_warning (stmt, no_warning_opt);
2774 /* Check the size argument to the built-in forms of stpncpy and strncpy
2775 for out-of-bounds offsets or overlapping access, and to see if the
2776 size argument is derived from a call to strlen() on the source argument,
2777 and if so, issue an appropriate warning. */
2779 void
2780 strlen_pass::handle_builtin_strncat (built_in_function)
2782 /* Same as stxncpy(). */
2783 handle_builtin_stxncpy_strncat (true);
2786 /* Return true if LEN depends on a call to strlen(SRC) in an interesting
2787 way. LEN can either be an integer expression, or a pointer (to char).
2788 When it is the latter (such as in recursive calls to self) it is
2789 assumed to be the argument in some call to strlen() whose relationship
2790 to SRC is being ascertained. */
2792 bool
2793 is_strlen_related_p (tree src, tree len)
2795 if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2796 && operand_equal_p (src, len, 0))
2797 return true;
2799 if (TREE_CODE (len) != SSA_NAME)
2800 return false;
2802 if (TREE_CODE (src) == SSA_NAME)
2804 gimple *srcdef = SSA_NAME_DEF_STMT (src);
2805 if (is_gimple_assign (srcdef))
2807 /* Handle bitwise AND used in conversions from wider size_t
2808 to narrower unsigned types. */
2809 tree_code code = gimple_assign_rhs_code (srcdef);
2810 if (code == BIT_AND_EXPR
2811 || code == NOP_EXPR)
2812 return is_strlen_related_p (gimple_assign_rhs1 (srcdef), len);
2814 return false;
2817 if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2819 /* If SRC is the result of a call to an allocation function
2820 or strlen, use the function's argument instead. */
2821 tree func = gimple_call_fndecl (srcdef);
2822 built_in_function code = DECL_FUNCTION_CODE (func);
2823 if (code == BUILT_IN_ALLOCA
2824 || code == BUILT_IN_ALLOCA_WITH_ALIGN
2825 || code == BUILT_IN_MALLOC
2826 || code == BUILT_IN_STRLEN)
2827 return is_strlen_related_p (gimple_call_arg (srcdef, 0), len);
2829 /* FIXME: Handle other functions with attribute alloc_size. */
2830 return false;
2834 gimple *lendef = SSA_NAME_DEF_STMT (len);
2835 if (!lendef)
2836 return false;
2838 if (is_gimple_call (lendef))
2840 tree func = gimple_call_fndecl (lendef);
2841 if (!valid_builtin_call (lendef)
2842 || DECL_FUNCTION_CODE (func) != BUILT_IN_STRLEN)
2843 return false;
2845 tree arg = gimple_call_arg (lendef, 0);
2846 return is_strlen_related_p (src, arg);
2849 if (!is_gimple_assign (lendef))
2850 return false;
2852 tree_code code = gimple_assign_rhs_code (lendef);
2853 tree rhs1 = gimple_assign_rhs1 (lendef);
2854 tree rhstype = TREE_TYPE (rhs1);
2856 if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2857 || (INTEGRAL_TYPE_P (rhstype)
2858 && (code == BIT_AND_EXPR
2859 || code == NOP_EXPR)))
2861 /* Pointer plus (an integer), and truncation are considered among
2862 the (potentially) related expressions to strlen. */
2863 return is_strlen_related_p (src, rhs1);
2866 if (tree rhs2 = gimple_assign_rhs2 (lendef))
2868 /* Integer subtraction is considered strlen-related when both
2869 arguments are integers and second one is strlen-related. */
2870 rhstype = TREE_TYPE (rhs2);
2871 if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
2872 return is_strlen_related_p (src, rhs2);
2875 return false;
2878 /* Called by handle_builtin_stxncpy_strncat and by
2879 gimple_fold_builtin_strncpy in gimple-fold.cc.
2880 Check to see if the specified bound is a) equal to the size of
2881 the destination DST and if so, b) if it's immediately followed by
2882 DST[CNT - 1] = '\0'. If a) holds and b) does not, warn. Otherwise,
2883 do nothing. Return true if diagnostic has been issued.
2885 The purpose is to diagnose calls to strncpy and stpncpy that do
2886 not nul-terminate the copy while allowing for the idiom where
2887 such a call is immediately followed by setting the last element
2888 to nul, as in:
2889 char a[32];
2890 strncpy (a, s, sizeof a);
2891 a[sizeof a - 1] = '\0';
2894 bool
2895 maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt,
2896 pointer_query *ptr_qry /* = NULL */)
2898 gimple *stmt = gsi_stmt (gsi);
2899 if (warning_suppressed_p (stmt, OPT_Wstringop_truncation))
2900 return false;
2902 wide_int cntrange[2];
2903 int_range_max r;
2904 if (!get_range_query (cfun)->range_of_expr (r, cnt)
2905 || r.varying_p ()
2906 || r.undefined_p ())
2907 return false;
2909 tree min, max;
2910 value_range_kind kind = get_legacy_range (r, min, max);
2911 cntrange[0] = wi::to_wide (min);
2912 cntrange[1] = wi::to_wide (max);
2913 if (kind == VR_ANTI_RANGE)
2915 wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
2917 if (wi::ltu_p (cntrange[1], maxobjsize))
2919 cntrange[0] = cntrange[1] + 1;
2920 cntrange[1] = maxobjsize;
2922 else
2924 cntrange[1] = cntrange[0] - 1;
2925 cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
2929 /* Negative value is the constant string length. If it's less than
2930 the lower bound there is no truncation. Avoid calling get_stridx()
2931 when ssa_ver_to_stridx is empty. That implies the caller isn't
2932 running under the control of this pass and ssa_ver_to_stridx hasn't
2933 been created yet. */
2934 int sidx = ssa_ver_to_stridx.length () ? get_stridx (src, stmt) : 0;
2935 if (sidx < 0 && wi::gtu_p (cntrange[0], ~sidx))
2936 return false;
2938 tree dst = gimple_call_arg (stmt, 0);
2939 tree dstdecl = dst;
2940 if (TREE_CODE (dstdecl) == ADDR_EXPR)
2941 dstdecl = TREE_OPERAND (dstdecl, 0);
2943 tree ref = NULL_TREE;
2945 if (!sidx)
2947 /* If the source is a non-string return early to avoid warning
2948 for possible truncation (if the truncation is certain SIDX
2949 is non-zero). */
2950 tree srcdecl = gimple_call_arg (stmt, 1);
2951 if (TREE_CODE (srcdecl) == ADDR_EXPR)
2952 srcdecl = TREE_OPERAND (srcdecl, 0);
2953 if (get_attr_nonstring_decl (srcdecl, &ref))
2954 return false;
2957 /* Likewise, if the destination refers to an array/pointer declared
2958 nonstring return early. */
2959 if (get_attr_nonstring_decl (dstdecl, &ref))
2960 return false;
2962 /* Look for dst[i] = '\0'; after the stxncpy() call and if found
2963 avoid the truncation warning. */
2964 gsi_next_nondebug (&gsi);
2965 gimple *next_stmt = gsi_stmt (gsi);
2966 if (!next_stmt)
2968 /* When there is no statement in the same basic block check
2969 the immediate successor block. */
2970 if (basic_block bb = gimple_bb (stmt))
2972 if (single_succ_p (bb))
2974 /* For simplicity, ignore blocks with multiple outgoing
2975 edges for now and only consider successor blocks along
2976 normal edges. */
2977 edge e = EDGE_SUCC (bb, 0);
2978 if (!(e->flags & EDGE_ABNORMAL))
2980 gsi = gsi_start_bb (e->dest);
2981 next_stmt = gsi_stmt (gsi);
2982 if (next_stmt && is_gimple_debug (next_stmt))
2984 gsi_next_nondebug (&gsi);
2985 next_stmt = gsi_stmt (gsi);
2992 if (next_stmt && is_gimple_assign (next_stmt))
2994 tree lhs = gimple_assign_lhs (next_stmt);
2995 tree_code code = TREE_CODE (lhs);
2996 if (code == ARRAY_REF || code == MEM_REF)
2997 lhs = TREE_OPERAND (lhs, 0);
2999 tree func = gimple_call_fndecl (stmt);
3000 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STPNCPY)
3002 tree ret = gimple_call_lhs (stmt);
3003 if (ret && operand_equal_p (ret, lhs, 0))
3004 return false;
3007 /* Determine the base address and offset of the reference,
3008 ignoring the innermost array index. */
3009 if (TREE_CODE (ref) == ARRAY_REF)
3010 ref = TREE_OPERAND (ref, 0);
3012 poly_int64 dstoff;
3013 tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
3015 poly_int64 lhsoff;
3016 tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
3017 if (lhsbase
3018 && dstbase
3019 && known_eq (dstoff, lhsoff)
3020 && operand_equal_p (dstbase, lhsbase, 0))
3021 return false;
3024 int prec = TYPE_PRECISION (TREE_TYPE (cnt));
3025 wide_int lenrange[2];
3026 if (strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL)
3028 lenrange[0] = (sisrc->nonzero_chars
3029 && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
3030 ? wi::to_wide (sisrc->nonzero_chars)
3031 : wi::zero (prec));
3032 lenrange[1] = lenrange[0];
3034 else if (sidx < 0)
3035 lenrange[0] = lenrange[1] = wi::shwi (~sidx, prec);
3036 else
3038 c_strlen_data lendata = { };
3039 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3040 to have it set to the length of the longest string in a PHI. */
3041 lendata.maxbound = src;
3042 get_range_strlen (src, &lendata, /* eltsize = */1);
3043 if (TREE_CODE (lendata.minlen) == INTEGER_CST
3044 && TREE_CODE (lendata.maxbound) == INTEGER_CST)
3046 /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
3047 which stores the length of the shortest known string. */
3048 if (integer_all_onesp (lendata.maxlen))
3049 lenrange[0] = wi::shwi (0, prec);
3050 else
3051 lenrange[0] = wi::to_wide (lendata.minlen, prec);
3052 lenrange[1] = wi::to_wide (lendata.maxbound, prec);
3054 else
3056 lenrange[0] = wi::shwi (0, prec);
3057 lenrange[1] = wi::shwi (-1, prec);
3061 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3062 tree func = gimple_call_fndecl (stmt);
3064 if (lenrange[0] != 0 || !wi::neg_p (lenrange[1]))
3066 /* If the longest source string is shorter than the lower bound
3067 of the specified count the copy is definitely nul-terminated. */
3068 if (wi::ltu_p (lenrange[1], cntrange[0]))
3069 return false;
3071 if (wi::neg_p (lenrange[1]))
3073 /* The length of one of the strings is unknown but at least
3074 one has non-zero length and that length is stored in
3075 LENRANGE[1]. Swap the bounds to force a "may be truncated"
3076 warning below. */
3077 lenrange[1] = lenrange[0];
3078 lenrange[0] = wi::shwi (0, prec);
3081 /* Set to true for strncat whose bound is derived from the length
3082 of the destination (the expected usage pattern). */
3083 bool cat_dstlen_bounded = false;
3084 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STRNCAT)
3085 cat_dstlen_bounded = is_strlen_related_p (dst, cnt);
3087 if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
3088 return warning_n (callloc, OPT_Wstringop_truncation,
3089 cntrange[0].to_uhwi (),
3090 "%qD output truncated before terminating "
3091 "nul copying %E byte from a string of the "
3092 "same length",
3093 "%qD output truncated before terminating nul "
3094 "copying %E bytes from a string of the same "
3095 "length",
3096 func, cnt);
3097 else if (!cat_dstlen_bounded)
3099 if (wi::geu_p (lenrange[0], cntrange[1]))
3101 /* The shortest string is longer than the upper bound of
3102 the count so the truncation is certain. */
3103 if (cntrange[0] == cntrange[1])
3104 return warning_n (callloc, OPT_Wstringop_truncation,
3105 cntrange[0].to_uhwi (),
3106 "%qD output truncated copying %E byte "
3107 "from a string of length %wu",
3108 "%qD output truncated copying %E bytes "
3109 "from a string of length %wu",
3110 func, cnt, lenrange[0].to_uhwi ());
3112 return warning_at (callloc, OPT_Wstringop_truncation,
3113 "%qD output truncated copying between %wu "
3114 "and %wu bytes from a string of length %wu",
3115 func, cntrange[0].to_uhwi (),
3116 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3118 else if (wi::geu_p (lenrange[1], cntrange[1]))
3120 /* The longest string is longer than the upper bound of
3121 the count so the truncation is possible. */
3122 if (cntrange[0] == cntrange[1])
3123 return warning_n (callloc, OPT_Wstringop_truncation,
3124 cntrange[0].to_uhwi (),
3125 "%qD output may be truncated copying %E "
3126 "byte from a string of length %wu",
3127 "%qD output may be truncated copying %E "
3128 "bytes from a string of length %wu",
3129 func, cnt, lenrange[1].to_uhwi ());
3131 return warning_at (callloc, OPT_Wstringop_truncation,
3132 "%qD output may be truncated copying between "
3133 "%wu and %wu bytes from a string of length %wu",
3134 func, cntrange[0].to_uhwi (),
3135 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3139 if (!cat_dstlen_bounded
3140 && cntrange[0] != cntrange[1]
3141 && wi::leu_p (cntrange[0], lenrange[0])
3142 && wi::leu_p (cntrange[1], lenrange[0] + 1))
3144 /* If the source (including the terminating nul) is longer than
3145 the lower bound of the specified count but shorter than the
3146 upper bound the copy may (but need not) be truncated. */
3147 return warning_at (callloc, OPT_Wstringop_truncation,
3148 "%qD output may be truncated copying between "
3149 "%wu and %wu bytes from a string of length %wu",
3150 func, cntrange[0].to_uhwi (),
3151 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3155 access_ref aref;
3156 if (tree dstsize = compute_objsize (dst, stmt, 1, &aref, ptr_qry))
3158 /* The source length is unknown. Try to determine the destination
3159 size and see if it matches the specified bound. If not, bail.
3160 Otherwise go on to see if it should be diagnosed for possible
3161 truncation. */
3162 if (!dstsize)
3163 return false;
3165 if (wi::to_wide (dstsize) != cntrange[1])
3166 return false;
3168 /* Avoid warning for strncpy(a, b, N) calls where the following
3169 equalities hold:
3170 N == sizeof a && N == sizeof b */
3171 if (tree srcsize = compute_objsize (src, stmt, 1, &aref, ptr_qry))
3172 if (wi::to_wide (srcsize) == cntrange[1])
3173 return false;
3175 if (cntrange[0] == cntrange[1])
3176 return warning_at (callloc, OPT_Wstringop_truncation,
3177 "%qD specified bound %E equals destination size",
3178 func, cnt);
3181 return false;
3184 /* Check the arguments to the built-in forms of stpncpy, strncpy, and
3185 strncat, for out-of-bounds offsets or overlapping access, and to see
3186 if the size is derived from calling strlen() on the source argument,
3187 and if so, issue the appropriate warning.
3188 APPEND_P is true for strncat. */
3190 void
3191 strlen_pass::handle_builtin_stxncpy_strncat (bool append_p)
3193 if (!strlen_to_stridx)
3194 return;
3196 gimple *stmt = gsi_stmt (m_gsi);
3198 tree dst = gimple_call_arg (stmt, 0);
3199 tree src = gimple_call_arg (stmt, 1);
3200 tree len = gimple_call_arg (stmt, 2);
3201 /* An upper bound of the size of the destination. */
3202 tree dstsize = NULL_TREE;
3203 /* The length of the destination and source strings (plus 1 for those
3204 whose FULL_STRING_P is set, i.e., whose length is exact rather than
3205 a lower bound). */
3206 tree dstlenp1 = NULL_TREE, srclenp1 = NULL_TREE;;
3208 int didx = get_stridx (dst, stmt);
3209 if (strinfo *sidst = didx > 0 ? get_strinfo (didx) : NULL)
3211 /* Compute the size of the destination string including the nul
3212 if it is known to be nul-terminated. */
3213 if (sidst->nonzero_chars)
3215 if (sidst->full_string_p)
3217 /* String is known to be nul-terminated. */
3218 tree type = TREE_TYPE (sidst->nonzero_chars);
3219 dstlenp1 = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3220 build_int_cst (type, 1));
3222 else
3223 dstlenp1 = sidst->nonzero_chars;
3225 else if (TREE_CODE (sidst->ptr) == SSA_NAME)
3227 gimple *def_stmt = SSA_NAME_DEF_STMT (sidst->ptr);
3228 dstsize = gimple_call_alloc_size (def_stmt);
3231 dst = sidst->ptr;
3234 int sidx = get_stridx (src, stmt);
3235 strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL;
3236 if (sisrc)
3238 /* strncat() and strncpy() can modify the source string by writing
3239 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3240 clear. */
3242 /* Compute the size of the source string including the terminating
3243 nul if its known to be nul-terminated. */
3244 if (sisrc->nonzero_chars)
3246 if (sisrc->full_string_p)
3248 tree type = TREE_TYPE (sisrc->nonzero_chars);
3249 srclenp1 = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3250 build_int_cst (type, 1));
3252 else
3253 srclenp1 = sisrc->nonzero_chars;
3256 src = sisrc->ptr;
3258 else
3259 srclenp1 = NULL_TREE;
3261 opt_code opt = check_bounds_or_overlap (stmt, dst, src, dstlenp1, srclenp1);
3262 if (opt != no_warning)
3264 suppress_warning (stmt, opt);
3265 return;
3268 /* If the length argument was computed from strlen(S) for some string
3269 S retrieve the strinfo index for the string (PSS->FIRST) along with
3270 the location of the strlen() call (PSS->SECOND). */
3271 stridx_strlenloc *pss = strlen_to_stridx->get (len);
3272 if (!pss || pss->first <= 0)
3274 if (maybe_diag_stxncpy_trunc (m_gsi, src, len))
3275 suppress_warning (stmt, OPT_Wstringop_truncation);
3277 return;
3280 /* Retrieve the strinfo data for the string S that LEN was computed
3281 from as some function F of strlen (S) (i.e., LEN need not be equal
3282 to strlen(S)). */
3283 strinfo *silen = get_strinfo (pss->first);
3285 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3287 tree func = gimple_call_fndecl (stmt);
3289 bool warned = false;
3291 /* When -Wstringop-truncation is set, try to determine truncation
3292 before diagnosing possible overflow. Truncation is implied by
3293 the LEN argument being equal to strlen(SRC), regardless of
3294 whether its value is known. Otherwise, when appending, or
3295 when copying into a destination of known size, issue the more
3296 generic -Wstringop-overflow which triggers for LEN arguments
3297 that in any meaningful way depend on strlen(SRC). */
3298 if (!append_p
3299 && sisrc == silen
3300 && is_strlen_related_p (src, len)
3301 && warning_at (callloc, OPT_Wstringop_truncation,
3302 "%qD output truncated before terminating nul "
3303 "copying as many bytes from a string as its length",
3304 func))
3305 warned = true;
3306 else if ((append_p || !dstsize || len == dstlenp1)
3307 && silen && is_strlen_related_p (src, silen->ptr))
3309 /* Issue -Wstringop-overflow when appending or when writing into
3310 a destination of a known size. Otherwise, when copying into
3311 a destination of an unknown size, it's truncation. */
3312 opt_code opt = (append_p || dstsize
3313 ? OPT_Wstringop_overflow_ : OPT_Wstringop_truncation);
3314 warned = warning_at (callloc, opt,
3315 "%qD specified bound depends on the length "
3316 "of the source argument",
3317 func);
3319 if (warned)
3321 location_t strlenloc = pss->second;
3322 if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3323 inform (strlenloc, "length computed here");
3327 /* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3328 If strlen of the second argument is known and length of the third argument
3329 is that plus one, strlen of the first argument is the same after this
3330 call. Uses RVALS to determine range information. */
3332 void
3333 strlen_pass::handle_builtin_memcpy (built_in_function bcode)
3335 tree lhs, oldlen, newlen;
3336 gimple *stmt = gsi_stmt (m_gsi);
3337 strinfo *si, *dsi;
3339 tree len = gimple_call_arg (stmt, 2);
3340 tree src = gimple_call_arg (stmt, 1);
3341 tree dst = gimple_call_arg (stmt, 0);
3343 int didx = get_stridx (dst, stmt);
3344 strinfo *olddsi = NULL;
3345 if (didx > 0)
3346 olddsi = get_strinfo (didx);
3347 else if (didx < 0)
3348 return;
3350 if (olddsi != NULL
3351 && !integer_zerop (len))
3353 maybe_warn_overflow (stmt, false, len, olddsi, false, true);
3354 if (tree_fits_uhwi_p (len))
3355 adjust_last_stmt (olddsi, stmt, false);
3358 int idx = get_stridx (src, stmt);
3359 if (idx == 0)
3360 return;
3362 bool full_string_p;
3363 if (idx > 0)
3365 gimple *def_stmt;
3367 /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3368 is known. */
3369 si = get_strinfo (idx);
3370 if (si == NULL || si->nonzero_chars == NULL_TREE)
3371 return;
3372 if (TREE_CODE (len) == INTEGER_CST
3373 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3375 if (tree_int_cst_le (len, si->nonzero_chars))
3377 /* Copying LEN nonzero characters, where LEN is constant. */
3378 newlen = len;
3379 full_string_p = false;
3381 else
3383 /* Copying the whole of the analyzed part of SI. */
3384 newlen = si->nonzero_chars;
3385 full_string_p = si->full_string_p;
3388 else
3390 if (!si->full_string_p)
3391 return;
3392 if (TREE_CODE (len) != SSA_NAME)
3393 return;
3394 def_stmt = SSA_NAME_DEF_STMT (len);
3395 if (!is_gimple_assign (def_stmt)
3396 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
3397 || gimple_assign_rhs1 (def_stmt) != si->nonzero_chars
3398 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
3399 return;
3400 /* Copying variable-length string SI (and no more). */
3401 newlen = si->nonzero_chars;
3402 full_string_p = true;
3405 else
3407 si = NULL;
3408 /* Handle memcpy (x, "abcd", 5) or
3409 memcpy (x, "abc\0uvw", 7). */
3410 if (!tree_fits_uhwi_p (len))
3411 return;
3413 unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3414 unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3415 newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3416 full_string_p = clen > nonzero_chars;
3419 if (!full_string_p
3420 && olddsi
3421 && olddsi->nonzero_chars
3422 && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3423 && tree_int_cst_le (newlen, olddsi->nonzero_chars))
3425 /* The SRC substring being written strictly overlaps
3426 a subsequence of the existing string OLDDSI. */
3427 newlen = olddsi->nonzero_chars;
3428 full_string_p = olddsi->full_string_p;
3431 if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3432 adjust_last_stmt (olddsi, stmt, false);
3434 if (didx == 0)
3436 didx = new_stridx (dst);
3437 if (didx == 0)
3438 return;
3440 oldlen = NULL_TREE;
3441 if (olddsi != NULL)
3443 dsi = unshare_strinfo (olddsi);
3444 oldlen = olddsi->nonzero_chars;
3445 dsi->nonzero_chars = newlen;
3446 dsi->full_string_p = full_string_p;
3447 /* Break the chain, so adjust_related_strinfo on later pointers in
3448 the chain won't adjust this one anymore. */
3449 dsi->next = 0;
3450 dsi->stmt = NULL;
3451 dsi->endptr = NULL_TREE;
3453 else
3455 dsi = new_strinfo (dst, didx, newlen, full_string_p);
3456 set_strinfo (didx, dsi);
3457 find_equal_ptrs (dst, didx);
3459 dsi->writable = true;
3460 dsi->dont_invalidate = true;
3461 if (olddsi != NULL)
3463 tree adj = NULL_TREE;
3464 location_t loc = gimple_location (stmt);
3465 if (oldlen == NULL_TREE)
3467 else if (integer_zerop (oldlen))
3468 adj = newlen;
3469 else if (TREE_CODE (oldlen) == INTEGER_CST
3470 || TREE_CODE (newlen) == INTEGER_CST)
3471 adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3472 fold_convert_loc (loc, TREE_TYPE (newlen),
3473 oldlen));
3474 if (adj != NULL_TREE)
3475 adjust_related_strinfos (loc, dsi, adj);
3476 else
3477 dsi->prev = 0;
3479 /* memcpy src may not overlap dst, so src doesn't need to be
3480 invalidated either. */
3481 if (si != NULL)
3482 si->dont_invalidate = true;
3484 if (full_string_p)
3486 lhs = gimple_call_lhs (stmt);
3487 switch (bcode)
3489 case BUILT_IN_MEMCPY:
3490 case BUILT_IN_MEMCPY_CHK:
3491 /* Allow adjust_last_stmt to decrease this memcpy's size. */
3492 laststmt.stmt = stmt;
3493 laststmt.len = dsi->nonzero_chars;
3494 laststmt.stridx = dsi->idx;
3495 if (lhs)
3496 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3497 break;
3498 case BUILT_IN_MEMPCPY:
3499 case BUILT_IN_MEMPCPY_CHK:
3500 break;
3501 default:
3502 gcc_unreachable ();
3507 /* Handle a strcat-like ({strcat,__strcat_chk}) call.
3508 If strlen of the second argument is known, strlen of the first argument
3509 is increased by the length of the second argument. Furthermore, attempt
3510 to convert it to memcpy/strcpy if the length of the first argument
3511 is known. */
3513 void
3514 strlen_pass::handle_builtin_strcat (built_in_function bcode)
3516 int idx, didx;
3517 tree srclen, args, type, fn, objsz, endptr;
3518 bool success;
3519 gimple *stmt = gsi_stmt (m_gsi);
3520 strinfo *si, *dsi;
3521 location_t loc = gimple_location (stmt);
3523 tree src = gimple_call_arg (stmt, 1);
3524 tree dst = gimple_call_arg (stmt, 0);
3526 /* Bail if the source is the same as destination. It will be diagnosed
3527 elsewhere. */
3528 if (operand_equal_p (src, dst, 0))
3529 return;
3531 tree lhs = gimple_call_lhs (stmt);
3533 didx = get_stridx (dst, stmt);
3534 if (didx < 0)
3535 return;
3537 dsi = NULL;
3538 if (didx > 0)
3539 dsi = get_strinfo (didx);
3541 srclen = NULL_TREE;
3542 si = NULL;
3543 idx = get_stridx (src, stmt);
3544 if (idx < 0)
3545 srclen = build_int_cst (size_type_node, ~idx);
3546 else if (idx > 0)
3548 si = get_strinfo (idx);
3549 if (si != NULL)
3550 srclen = get_string_length (si);
3553 /* Disable warning for the transformed statement? */
3554 opt_code no_warning_opt = no_warning;
3556 if (dsi == NULL || get_string_length (dsi) == NULL_TREE)
3559 /* The concatenation always involves copying at least one byte
3560 (the terminating nul), even if the source string is empty.
3561 If the source is unknown assume it's one character long and
3562 used that as both sizes. */
3563 tree slen = srclen;
3564 if (slen)
3566 tree type = TREE_TYPE (slen);
3567 slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3570 tree sptr = si && si->ptr ? si->ptr : src;
3571 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE,
3572 slen);
3573 if (no_warning_opt)
3574 suppress_warning (stmt, no_warning_opt);
3577 /* strcat (p, q) can be transformed into
3578 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3579 with length endptr - p if we need to compute the length
3580 later on. Don't do this transformation if we don't need
3581 it. */
3582 if (builtin_decl_implicit_p (BUILT_IN_STPCPY) && lhs == NULL_TREE)
3584 if (didx == 0)
3586 didx = new_stridx (dst);
3587 if (didx == 0)
3588 return;
3590 if (dsi == NULL)
3592 dsi = new_strinfo (dst, didx, NULL_TREE, false);
3593 set_strinfo (didx, dsi);
3594 find_equal_ptrs (dst, didx);
3596 else
3598 dsi = unshare_strinfo (dsi);
3599 dsi->nonzero_chars = NULL_TREE;
3600 dsi->full_string_p = false;
3601 dsi->next = 0;
3602 dsi->endptr = NULL_TREE;
3604 dsi->writable = true;
3605 dsi->stmt = stmt;
3606 dsi->dont_invalidate = true;
3608 return;
3611 tree dstlen = dsi->nonzero_chars;
3612 endptr = dsi->endptr;
3614 dsi = unshare_strinfo (dsi);
3615 dsi->endptr = NULL_TREE;
3616 dsi->stmt = NULL;
3617 dsi->writable = true;
3619 if (srclen != NULL_TREE)
3621 dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3622 TREE_TYPE (dsi->nonzero_chars),
3623 dsi->nonzero_chars, srclen);
3624 gcc_assert (dsi->full_string_p);
3625 adjust_related_strinfos (loc, dsi, srclen);
3626 dsi->dont_invalidate = true;
3628 else
3630 dsi->nonzero_chars = NULL;
3631 dsi->full_string_p = false;
3632 if (lhs == NULL_TREE && builtin_decl_implicit_p (BUILT_IN_STPCPY))
3633 dsi->dont_invalidate = true;
3636 if (si != NULL)
3637 /* strcat src may not overlap dst, so src doesn't need to be
3638 invalidated either. */
3639 si->dont_invalidate = true;
3641 /* For now. Could remove the lhs from the call and add
3642 lhs = dst; afterwards. */
3643 if (lhs)
3644 return;
3646 fn = NULL_TREE;
3647 objsz = NULL_TREE;
3648 switch (bcode)
3650 case BUILT_IN_STRCAT:
3651 if (srclen != NULL_TREE)
3652 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
3653 else
3654 fn = builtin_decl_implicit (BUILT_IN_STRCPY);
3655 break;
3656 case BUILT_IN_STRCAT_CHK:
3657 if (srclen != NULL_TREE)
3658 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
3659 else
3660 fn = builtin_decl_explicit (BUILT_IN_STRCPY_CHK);
3661 objsz = gimple_call_arg (stmt, 2);
3662 break;
3663 default:
3664 gcc_unreachable ();
3667 if (fn == NULL_TREE)
3668 return;
3670 if (dsi && dstlen)
3672 tree type = TREE_TYPE (dstlen);
3674 /* Compute the size of the source sequence, including the nul. */
3675 tree srcsize = srclen ? srclen : size_zero_node;
3676 tree one = build_int_cst (type, 1);
3677 srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3678 tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3679 tree sptr = si && si->ptr ? si->ptr : src;
3681 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, dstsize,
3682 srcsize);
3683 if (no_warning_opt)
3684 suppress_warning (stmt, no_warning_opt);
3687 tree len = NULL_TREE;
3688 if (srclen != NULL_TREE)
3690 args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3691 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3693 len = fold_convert_loc (loc, type, unshare_expr (srclen));
3694 len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3695 build_int_cst (type, 1));
3696 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
3697 GSI_SAME_STMT);
3699 if (endptr)
3700 dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3701 else
3702 dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3703 fold_convert_loc (loc, sizetype,
3704 unshare_expr (dstlen)));
3705 dst = force_gimple_operand_gsi (&m_gsi, dst, true, NULL_TREE, true,
3706 GSI_SAME_STMT);
3707 if (objsz)
3709 objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3710 fold_convert_loc (loc, TREE_TYPE (objsz),
3711 unshare_expr (dstlen)));
3712 objsz = force_gimple_operand_gsi (&m_gsi, objsz, true, NULL_TREE, true,
3713 GSI_SAME_STMT);
3715 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3717 fprintf (dump_file, "Optimizing: ");
3718 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3720 if (srclen != NULL_TREE)
3721 success = update_gimple_call (&m_gsi, fn, 3 + (objsz != NULL_TREE),
3722 dst, src, len, objsz);
3723 else
3724 success = update_gimple_call (&m_gsi, fn, 2 + (objsz != NULL_TREE),
3725 dst, src, objsz);
3726 if (success)
3728 stmt = gsi_stmt (m_gsi);
3729 update_stmt (stmt);
3730 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3732 fprintf (dump_file, "into: ");
3733 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3735 /* If srclen == NULL, note that current string length can be
3736 computed by transforming this strcpy into stpcpy. */
3737 if (srclen == NULL_TREE && dsi->dont_invalidate)
3738 dsi->stmt = stmt;
3739 adjust_last_stmt (dsi, stmt, true);
3740 if (srclen != NULL_TREE)
3742 laststmt.stmt = stmt;
3743 laststmt.len = srclen;
3744 laststmt.stridx = dsi->idx;
3747 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3748 fprintf (dump_file, "not possible.\n");
3750 if (no_warning_opt)
3751 suppress_warning (stmt, no_warning_opt);
3754 /* Handle a call to an allocation function like alloca, malloc or calloc,
3755 or an ordinary allocation function declared with attribute alloc_size. */
3757 void
3758 strlen_pass::handle_alloc_call (built_in_function bcode)
3760 gimple *stmt = gsi_stmt (m_gsi);
3761 tree lhs = gimple_call_lhs (stmt);
3762 if (lhs == NULL_TREE)
3763 return;
3765 gcc_assert (get_stridx (lhs, stmt) == 0);
3766 int idx = new_stridx (lhs);
3767 tree length = NULL_TREE;
3768 if (bcode == BUILT_IN_CALLOC)
3769 length = build_int_cst (size_type_node, 0);
3770 strinfo *si = new_strinfo (lhs, idx, length, length != NULL_TREE);
3771 if (bcode == BUILT_IN_CALLOC)
3773 /* Only set STMT for calloc and malloc. */
3774 si->stmt = stmt;
3775 /* Only set ENDPTR for calloc. */
3776 si->endptr = lhs;
3778 else if (bcode == BUILT_IN_MALLOC)
3779 si->stmt = stmt;
3781 /* Set ALLOC is set for all allocation functions. */
3782 si->alloc = stmt;
3783 set_strinfo (idx, si);
3784 si->writable = true;
3785 si->dont_invalidate = true;
3788 /* Handle a call to memset.
3789 After a call to calloc, memset(,0,) is unnecessary.
3790 memset(malloc(n),0,n) is calloc(n,1).
3791 return true when the call is transformed, false otherwise.
3792 When nonnull uses RVALS to determine range information. */
3794 bool
3795 strlen_pass::handle_builtin_memset (bool *zero_write)
3797 gimple *memset_stmt = gsi_stmt (m_gsi);
3798 tree ptr = gimple_call_arg (memset_stmt, 0);
3799 tree memset_val = gimple_call_arg (memset_stmt, 1);
3800 tree memset_size = gimple_call_arg (memset_stmt, 2);
3802 /* Set to the non-constant offset added to PTR. */
3803 wide_int offrng[2];
3804 int idx1 = get_stridx (ptr, memset_stmt, offrng, ptr_qry.rvals);
3805 if (idx1 == 0
3806 && TREE_CODE (memset_val) == INTEGER_CST
3807 && ((TREE_CODE (memset_size) == INTEGER_CST
3808 && !integer_zerop (memset_size))
3809 || TREE_CODE (memset_size) == SSA_NAME))
3811 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << CHAR_TYPE_SIZE) - 1;
3812 bool full_string_p = (wi::to_wide (memset_val) & mask) == 0;
3814 /* We only handle symbolic lengths when writing non-zero values. */
3815 if (full_string_p && TREE_CODE (memset_size) != INTEGER_CST)
3816 return false;
3818 idx1 = new_stridx (ptr);
3819 if (idx1 == 0)
3820 return false;
3821 tree newlen;
3822 if (full_string_p)
3823 newlen = build_int_cst (size_type_node, 0);
3824 else if (TREE_CODE (memset_size) == INTEGER_CST)
3825 newlen = fold_convert (size_type_node, memset_size);
3826 else
3827 newlen = memset_size;
3829 strinfo *dsi = new_strinfo (ptr, idx1, newlen, full_string_p);
3830 set_strinfo (idx1, dsi);
3831 find_equal_ptrs (ptr, idx1);
3832 dsi->dont_invalidate = true;
3833 dsi->writable = true;
3834 return false;
3837 if (idx1 <= 0)
3838 return false;
3839 strinfo *si1 = get_strinfo (idx1);
3840 if (!si1)
3841 return false;
3842 gimple *alloc_stmt = si1->alloc;
3843 if (!alloc_stmt || !is_gimple_call (alloc_stmt))
3844 return false;
3845 tree callee1 = gimple_call_fndecl (alloc_stmt);
3846 if (!valid_builtin_call (alloc_stmt))
3847 return false;
3848 tree alloc_size = gimple_call_arg (alloc_stmt, 0);
3850 /* Check for overflow. */
3851 maybe_warn_overflow (memset_stmt, false, memset_size, NULL, false, true);
3853 /* Bail when there is no statement associated with the destination
3854 (the statement may be null even when SI1->ALLOC is not). */
3855 if (!si1->stmt)
3856 return false;
3858 /* Avoid optimizing if store is at a variable offset from the beginning
3859 of the allocated object. */
3860 if (offrng[0] != 0 || offrng[0] != offrng[1])
3861 return false;
3863 /* Bail when the call writes a non-zero value. */
3864 if (!integer_zerop (memset_val))
3865 return false;
3867 /* Let the caller know the memset call cleared the destination. */
3868 *zero_write = true;
3870 enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
3871 if (code1 == BUILT_IN_CALLOC)
3872 /* Not touching alloc_stmt */ ;
3873 else if (code1 == BUILT_IN_MALLOC
3874 && operand_equal_p (memset_size, alloc_size, 0))
3876 /* Replace the malloc + memset calls with calloc. */
3877 gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3878 update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
3879 alloc_size, build_one_cst (size_type_node));
3880 si1->nonzero_chars = build_int_cst (size_type_node, 0);
3881 si1->full_string_p = true;
3882 si1->stmt = gsi_stmt (gsi1);
3884 else
3885 return false;
3886 tree lhs = gimple_call_lhs (memset_stmt);
3887 unlink_stmt_vdef (memset_stmt);
3888 if (lhs)
3890 gimple *assign = gimple_build_assign (lhs, ptr);
3891 gsi_replace (&m_gsi, assign, false);
3893 else
3895 gsi_remove (&m_gsi, true);
3896 release_defs (memset_stmt);
3899 return true;
3902 /* Return first such statement if RES is used in statements testing its
3903 equality to zero, and null otherwise. If EXCLUSIVE is true, return
3904 nonnull if and only RES is used in such expressions exclusively and
3905 in none other. */
3907 gimple *
3908 use_in_zero_equality (tree res, bool exclusive)
3910 gimple *first_use = NULL;
3912 use_operand_p use_p;
3913 imm_use_iterator iter;
3915 FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3917 gimple *use_stmt = USE_STMT (use_p);
3919 if (is_gimple_debug (use_stmt))
3920 continue;
3922 if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
3924 tree_code code = gimple_assign_rhs_code (use_stmt);
3925 if (code == COND_EXPR)
3927 tree cond_expr = gimple_assign_rhs1 (use_stmt);
3928 if ((TREE_CODE (cond_expr) != EQ_EXPR
3929 && (TREE_CODE (cond_expr) != NE_EXPR))
3930 || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
3932 if (exclusive)
3933 return NULL;
3934 continue;
3937 else if (code == EQ_EXPR || code == NE_EXPR)
3939 if (!integer_zerop (gimple_assign_rhs2 (use_stmt)))
3941 if (exclusive)
3942 return NULL;
3943 continue;
3946 else if (exclusive)
3947 return NULL;
3948 else
3949 continue;
3951 else if (gimple_code (use_stmt) == GIMPLE_COND)
3953 tree_code code = gimple_cond_code (use_stmt);
3954 if ((code != EQ_EXPR && code != NE_EXPR)
3955 || !integer_zerop (gimple_cond_rhs (use_stmt)))
3957 if (exclusive)
3958 return NULL;
3959 continue;
3962 else if (exclusive)
3963 return NULL;
3964 else
3965 continue;
3967 if (!first_use)
3968 first_use = use_stmt;
3971 return first_use;
3974 /* Handle a call to memcmp. We try to handle small comparisons by
3975 converting them to load and compare, and replacing the call to memcmp
3976 with a __builtin_memcmp_eq call where possible.
3977 return true when call is transformed, return false otherwise. */
3979 bool
3980 strlen_pass::handle_builtin_memcmp ()
3982 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
3983 tree res = gimple_call_lhs (stmt);
3985 if (!res || !use_in_zero_equality (res))
3986 return false;
3988 tree arg1 = gimple_call_arg (stmt, 0);
3989 tree arg2 = gimple_call_arg (stmt, 1);
3990 tree len = gimple_call_arg (stmt, 2);
3991 unsigned HOST_WIDE_INT leni;
3993 if (tree_fits_uhwi_p (len)
3994 && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
3995 && pow2p_hwi (leni))
3997 leni *= CHAR_TYPE_SIZE;
3998 unsigned align1 = get_pointer_alignment (arg1);
3999 unsigned align2 = get_pointer_alignment (arg2);
4000 unsigned align = MIN (align1, align2);
4001 scalar_int_mode mode;
4002 if (int_mode_for_size (leni, 1).exists (&mode)
4003 && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
4005 location_t loc = gimple_location (stmt);
4006 tree type, off;
4007 type = build_nonstandard_integer_type (leni, 1);
4008 gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
4009 tree ptrtype = build_pointer_type_for_mode (char_type_node,
4010 ptr_mode, true);
4011 off = build_int_cst (ptrtype, 0);
4012 arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
4013 arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
4014 tree tem1 = fold_const_aggregate_ref (arg1);
4015 if (tem1)
4016 arg1 = tem1;
4017 tree tem2 = fold_const_aggregate_ref (arg2);
4018 if (tem2)
4019 arg2 = tem2;
4020 res = fold_convert_loc (loc, TREE_TYPE (res),
4021 fold_build2_loc (loc, NE_EXPR,
4022 boolean_type_node,
4023 arg1, arg2));
4024 gimplify_and_update_call_from_tree (&m_gsi, res);
4025 return true;
4029 gimple_call_set_fndecl (stmt, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
4030 return true;
4033 /* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
4034 of the string(s) referenced by ARG if it can be determined.
4035 If the length cannot be determined, sets *SIZE to the size of
4036 the array the string is stored in, if any. If no such array is
4037 known, sets *SIZE to -1. When the strings are nul-terminated sets
4038 *NULTERM to true, otherwise to false. When nonnull uses RVALS to
4039 determine range information. Returns true on success. */
4041 bool
4042 strlen_pass::get_len_or_size (gimple *stmt, tree arg, int idx,
4043 unsigned HOST_WIDE_INT lenrng[2],
4044 unsigned HOST_WIDE_INT *size, bool *nulterm)
4046 /* Invalidate. */
4047 *size = HOST_WIDE_INT_M1U;
4049 if (idx < 0)
4051 /* IDX is the inverted constant string length. */
4052 lenrng[0] = ~idx;
4053 lenrng[1] = lenrng[0];
4054 *nulterm = true;
4055 return true;
4058 /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
4059 possible length + 1. */
4060 lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
4062 if (strinfo *si = idx ? get_strinfo (idx) : NULL)
4064 /* FIXME: Handle all this in_range_strlen_dynamic. */
4065 if (!si->nonzero_chars)
4067 else if (tree_fits_uhwi_p (si->nonzero_chars))
4069 lenrng[0] = tree_to_uhwi (si->nonzero_chars);
4070 *nulterm = si->full_string_p;
4071 /* Set the upper bound only if the string is known to be
4072 nul-terminated, otherwise leave it at maximum + 1. */
4073 if (*nulterm)
4074 lenrng[1] = lenrng[0];
4076 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
4078 int_range_max r;
4079 if (get_range_query (cfun)->range_of_expr (r, si->nonzero_chars)
4080 && !r.undefined_p ()
4081 && !r.varying_p ())
4083 lenrng[0] = r.lower_bound ().to_uhwi ();
4084 lenrng[1] = r.upper_bound ().to_uhwi ();
4085 *nulterm = si->full_string_p;
4090 if (lenrng[0] != HOST_WIDE_INT_MAX)
4091 return true;
4093 /* Compute the minimum and maximum real or possible lengths. */
4094 c_strlen_data lendata = { };
4095 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
4096 to have it set to the length of the longest string in a PHI. */
4097 lendata.maxbound = arg;
4098 get_range_strlen_dynamic (arg, stmt, &lendata, ptr_qry);
4100 unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
4101 if (tree_fits_uhwi_p (lendata.maxbound)
4102 && !integer_all_onesp (lendata.maxbound))
4103 maxbound = tree_to_uhwi (lendata.maxbound);
4105 if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
4107 unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
4108 unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
4110 /* The longest string in this data model. */
4111 const unsigned HOST_WIDE_INT lenmax
4112 = tree_to_uhwi (max_object_size ()) - 2;
4114 if (maxbound == HOST_WIDE_INT_M1U)
4116 lenrng[0] = minlen;
4117 lenrng[1] = maxlen;
4118 *nulterm = minlen == maxlen;
4120 else if (maxlen < lenmax)
4122 *size = maxbound + 1;
4123 *nulterm = false;
4125 else
4126 return false;
4128 return true;
4131 if (maxbound != HOST_WIDE_INT_M1U
4132 && lendata.maxlen
4133 && !integer_all_onesp (lendata.maxlen))
4135 /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
4136 of the longest string based on the sizes of the arrays referenced
4137 by ARG. */
4138 *size = maxbound + 1;
4139 *nulterm = false;
4140 return true;
4143 return false;
4146 /* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
4147 the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
4148 for a sufficiently large BOUND). If the result is based on the length
4149 of one string being greater than the longest string that would fit in
4150 the array pointer to by the argument, set *PLEN and *PSIZE to
4151 the corresponding length (or its complement when the string is known
4152 to be at least as long and need not be nul-terminated) and size.
4153 Otherwise return null. */
4155 tree
4156 strlen_pass::strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
4157 tree arg2, int idx2,
4158 unsigned HOST_WIDE_INT bound,
4159 unsigned HOST_WIDE_INT len[2],
4160 unsigned HOST_WIDE_INT *psize)
4162 /* Determine the range the length of each string is in and whether it's
4163 known to be nul-terminated, or the size of the array it's stored in. */
4164 bool nul1, nul2;
4165 unsigned HOST_WIDE_INT siz1, siz2;
4166 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4167 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &siz1, &nul1)
4168 || !get_len_or_size (stmt, arg2, idx2, len2rng, &siz2, &nul2))
4169 return NULL_TREE;
4171 /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4172 to HWI_MAX when invalid. Adjust the length of each string to consider
4173 to be no more than BOUND. */
4174 if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4175 len1rng[0] = bound;
4176 if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4177 len1rng[1] = bound;
4178 if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4179 len2rng[0] = bound;
4180 if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4181 len2rng[1] = bound;
4183 /* Two empty strings are equal. */
4184 if (len1rng[1] == 0 && len2rng[1] == 0)
4185 return integer_one_node;
4187 /* The strings are definitely unequal when the lower bound of the length
4188 of one of them is greater than the length of the longest string that
4189 would fit into the other array. */
4190 if (len1rng[0] == HOST_WIDE_INT_MAX
4191 && len2rng[0] != HOST_WIDE_INT_MAX
4192 && ((len2rng[0] < bound && len2rng[0] >= siz1)
4193 || len2rng[0] > siz1))
4195 *psize = siz1;
4196 len[0] = len1rng[0];
4197 /* Set LEN[0] to the lower bound of ARG1's length when it's
4198 nul-terminated or to the complement of its minimum length
4199 otherwise, */
4200 len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4201 return integer_zero_node;
4204 if (len2rng[0] == HOST_WIDE_INT_MAX
4205 && len1rng[0] != HOST_WIDE_INT_MAX
4206 && ((len1rng[0] < bound && len1rng[0] >= siz2)
4207 || len1rng[0] > siz2))
4209 *psize = siz2;
4210 len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4211 len[1] = len2rng[0];
4212 return integer_zero_node;
4215 /* The strings are also definitely unequal when their lengths are unequal
4216 and at least one is nul-terminated. */
4217 if (len1rng[0] != HOST_WIDE_INT_MAX
4218 && len2rng[0] != HOST_WIDE_INT_MAX
4219 && ((len1rng[1] < len2rng[0] && nul1)
4220 || (len2rng[1] < len1rng[0] && nul2)))
4222 if (bound <= len1rng[0] || bound <= len2rng[0])
4223 *psize = bound;
4224 else
4225 *psize = HOST_WIDE_INT_M1U;
4227 len[0] = len1rng[0];
4228 len[1] = len2rng[0];
4229 return integer_zero_node;
4232 /* The string lengths may be equal or unequal. Even when equal and
4233 both strings nul-terminated, without the string contents there's
4234 no way to determine whether they are equal. */
4235 return NULL_TREE;
4238 /* Diagnose pointless calls to strcmp or strncmp STMT with string
4239 arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4240 whose result is used in equality expressions that evaluate to
4241 a constant due to one argument being longer than the size of
4242 the other. */
4244 static void
4245 maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4246 unsigned HOST_WIDE_INT len[2],
4247 unsigned HOST_WIDE_INT siz)
4249 tree lhs = gimple_call_lhs (stmt);
4250 gimple *use = use_in_zero_equality (lhs, /* exclusive = */ false);
4251 if (!use)
4252 return;
4254 bool at_least = false;
4256 /* Excessive LEN[i] indicates a lower bound. */
4257 if (len[0] > HOST_WIDE_INT_MAX)
4259 at_least = true;
4260 len[0] = ~len[0];
4263 if (len[1] > HOST_WIDE_INT_MAX)
4265 at_least = true;
4266 len[1] = ~len[1];
4269 unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4271 /* FIXME: Include a note pointing to the declaration of the smaller
4272 array. */
4273 location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4275 tree callee = gimple_call_fndecl (stmt);
4276 bool warned = false;
4277 if (siz <= minlen && bound == -1)
4278 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4279 (at_least
4280 ? G_("%qD of a string of length %wu or more and "
4281 "an array of size %wu evaluates to nonzero")
4282 : G_("%qD of a string of length %wu and an array "
4283 "of size %wu evaluates to nonzero")),
4284 callee, minlen, siz);
4285 else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4287 if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4288 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4289 "%qD of strings of length %wu and %wu "
4290 "and bound of %wu evaluates to nonzero",
4291 callee, len[0], len[1], bound);
4292 else
4293 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4294 "%qD of a string of length %wu, an array "
4295 "of size %wu and bound of %wu evaluates to "
4296 "nonzero",
4297 callee, minlen, siz, bound);
4300 if (!warned)
4301 return;
4303 location_t use_loc = gimple_location (use);
4304 if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4305 inform (use_loc, "in this expression");
4309 /* Optimize a call to strcmp or strncmp either by folding it to a constant
4310 when possible or by transforming the latter to the former. Warn about
4311 calls where the length of one argument is greater than the size of
4312 the array to which the other argument points if the latter's length
4313 is not known. Return true when the call has been transformed into
4314 another and false otherwise. */
4316 bool
4317 strlen_pass::handle_builtin_string_cmp ()
4319 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
4320 tree lhs = gimple_call_lhs (stmt);
4322 if (!lhs)
4323 return false;
4325 tree arg1 = gimple_call_arg (stmt, 0);
4326 tree arg2 = gimple_call_arg (stmt, 1);
4327 int idx1 = get_stridx (arg1, stmt);
4328 int idx2 = get_stridx (arg2, stmt);
4330 /* For strncmp set to the value of the third argument if known. */
4331 HOST_WIDE_INT bound = -1;
4332 tree len = NULL_TREE;
4333 /* Extract the strncmp bound. */
4334 if (gimple_call_num_args (stmt) == 3)
4336 len = gimple_call_arg (stmt, 2);
4337 if (tree_fits_shwi_p (len))
4338 bound = tree_to_shwi (len);
4340 /* If the bound argument is NOT known, do nothing. */
4341 if (bound < 0)
4342 return false;
4345 /* Avoid folding if either argument is not a nul-terminated array.
4346 Defer warning until later. */
4347 if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4348 || !check_nul_terminated_array (NULL_TREE, arg2, len))
4349 return false;
4352 /* Set to the length of one argument (or its complement if it's
4353 the lower bound of a range) and the size of the array storing
4354 the other if the result is based on the former being equal to
4355 or greater than the latter. */
4356 unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4357 unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4359 /* Try to determine if the two strings are either definitely equal
4360 or definitely unequal and if so, either fold the result to zero
4361 (when equal) or set the range of the result to ~[0, 0] otherwise. */
4362 if (tree eqz = strxcmp_eqz_result (stmt, arg1, idx1, arg2, idx2, bound,
4363 len, &siz))
4365 if (integer_zerop (eqz))
4367 maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4369 /* When the lengths of the first two string arguments are
4370 known to be unequal set the range of the result to non-zero.
4371 This allows the call to be eliminated if its result is only
4372 used in tests for equality to zero. */
4373 int_range_max nz;
4374 nz.set_nonzero (TREE_TYPE (lhs));
4375 set_range_info (lhs, nz);
4376 return false;
4378 /* When the two strings are definitely equal (such as when they
4379 are both empty) fold the call to the constant result. */
4380 replace_call_with_value (&m_gsi, integer_zero_node);
4381 return true;
4385 /* Return if nothing is known about the strings pointed to by ARG1
4386 and ARG2. */
4387 if (idx1 == 0 && idx2 == 0)
4388 return false;
4390 /* Determine either the length or the size of each of the strings,
4391 whichever is available. */
4392 HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4393 HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4396 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4397 unsigned HOST_WIDE_INT arsz1, arsz2;
4398 bool nulterm[2];
4400 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &arsz1, nulterm)
4401 || !get_len_or_size (stmt, arg2, idx2, len2rng, &arsz2, nulterm + 1))
4402 return false;
4404 if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4405 cstlen1 = len1rng[0];
4406 else if (arsz1 < HOST_WIDE_INT_M1U)
4407 arysiz1 = arsz1;
4409 if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4410 cstlen2 = len2rng[0];
4411 else if (arsz2 < HOST_WIDE_INT_M1U)
4412 arysiz2 = arsz2;
4415 /* Bail if neither the string length nor the size of the array
4416 it is stored in can be determined. */
4417 if ((cstlen1 < 0 && arysiz1 < 0)
4418 || (cstlen2 < 0 && arysiz2 < 0)
4419 || (cstlen1 < 0 && cstlen2 < 0))
4420 return false;
4422 if (cstlen1 >= 0)
4423 ++cstlen1;
4424 if (cstlen2 >= 0)
4425 ++cstlen2;
4427 /* The exact number of characters to compare. */
4428 HOST_WIDE_INT cmpsiz;
4429 if (cstlen1 >= 0 && cstlen2 >= 0)
4430 cmpsiz = MIN (cstlen1, cstlen2);
4431 else if (cstlen1 >= 0)
4432 cmpsiz = cstlen1;
4433 else
4434 cmpsiz = cstlen2;
4435 if (bound >= 0)
4436 cmpsiz = MIN (cmpsiz, bound);
4437 /* The size of the array in which the unknown string is stored. */
4438 HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4440 if ((varsiz < 0 || cmpsiz < varsiz) && use_in_zero_equality (lhs))
4442 /* If the known length is less than the size of the other array
4443 and the strcmp result is only used to test equality to zero,
4444 transform the call to the equivalent _eq call. */
4445 if (tree fn = builtin_decl_implicit (bound < 0 ? BUILT_IN_STRCMP_EQ
4446 : BUILT_IN_STRNCMP_EQ))
4448 tree n = build_int_cst (size_type_node, cmpsiz);
4449 update_gimple_call (&m_gsi, fn, 3, arg1, arg2, n);
4450 return true;
4454 return false;
4457 /* Handle a POINTER_PLUS_EXPR statement.
4458 For p = "abcd" + 2; compute associated length, or if
4459 p = q + off is pointing to a '\0' character of a string, call
4460 zero_length_string on it. */
4462 void
4463 strlen_pass::handle_pointer_plus ()
4465 gimple *stmt = gsi_stmt (m_gsi);
4466 tree lhs = gimple_assign_lhs (stmt), off;
4467 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
4468 strinfo *si, *zsi;
4470 if (idx == 0)
4471 return;
4473 if (idx < 0)
4475 tree off = gimple_assign_rhs2 (stmt);
4476 if (tree_fits_uhwi_p (off)
4477 && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4478 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4479 = ~(~idx - (int) tree_to_uhwi (off));
4480 return;
4483 si = get_strinfo (idx);
4484 if (si == NULL || si->nonzero_chars == NULL_TREE)
4485 return;
4487 off = gimple_assign_rhs2 (stmt);
4488 zsi = NULL;
4489 if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, 0))
4490 zsi = zero_length_string (lhs, si);
4491 else if (TREE_CODE (off) == SSA_NAME)
4493 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4494 if (gimple_assign_single_p (def_stmt)
4495 && si->full_string_p
4496 && operand_equal_p (si->nonzero_chars,
4497 gimple_assign_rhs1 (def_stmt), 0))
4498 zsi = zero_length_string (lhs, si);
4500 if (zsi != NULL
4501 && si->endptr != NULL_TREE
4502 && si->endptr != lhs
4503 && TREE_CODE (si->endptr) == SSA_NAME)
4505 enum tree_code rhs_code
4506 = useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4507 ? SSA_NAME : NOP_EXPR;
4508 gimple_assign_set_rhs_with_ops (&m_gsi, rhs_code, si->endptr);
4509 gcc_assert (gsi_stmt (m_gsi) == stmt);
4510 update_stmt (stmt);
4514 /* Set LENRANGE to the number of nonzero bytes for a store of TYPE and
4515 clear all flags. Return true on success and false on failure. */
4517 static bool
4518 nonzero_bytes_for_type (tree type, unsigned lenrange[3],
4519 bool *nulterm, bool *allnul, bool *allnonnul)
4521 /* Use the size of the type of the expression as the size of the store,
4522 and set the upper bound of the length range to that of the size.
4523 Nothing is known about the contents so clear all flags. */
4524 tree typesize = TYPE_SIZE_UNIT (type);
4525 if (!type)
4526 return false;
4528 if (!tree_fits_uhwi_p (typesize))
4529 return false;
4531 unsigned HOST_WIDE_INT sz = tree_to_uhwi (typesize);
4532 if (sz > UINT_MAX)
4533 return false;
4535 lenrange[2] = sz;
4536 lenrange[1] = lenrange[2] ? lenrange[2] - 1 : 0;
4537 lenrange[0] = 0;
4538 *nulterm = false;
4539 *allnul = false;
4540 *allnonnul = false;
4541 return true;
4544 /* Recursively determine the minimum and maximum number of leading nonzero
4545 bytes in the representation of EXP at memory state VUSE and set
4546 LENRANGE[0] and LENRANGE[1] to each.
4547 Sets LENRANGE[2] to the total size of the access (which may be less
4548 than LENRANGE[1] when what's being referenced by EXP is a pointer
4549 rather than an array).
4550 Sets *NULTERM if the representation contains a zero byte, sets *ALLNUL
4551 if all the bytes are zero, and *ALLNONNUL is all are nonzero.
4552 OFFSET and NBYTES are the offset into the representation and
4553 the size of the access to it determined from an ADDR_EXPR (i.e.,
4554 a pointer) or MEM_REF or zero for other expressions.
4555 Uses RVALS to determine range information.
4556 Avoids recursing deeper than the limits in SNLIM allow.
4557 Returns true on success and false otherwise. */
4559 bool
4560 strlen_pass::count_nonzero_bytes (tree exp, tree vuse, gimple *stmt,
4561 unsigned HOST_WIDE_INT offset,
4562 unsigned HOST_WIDE_INT nbytes,
4563 unsigned lenrange[3], bool *nulterm,
4564 bool *allnul, bool *allnonnul,
4565 ssa_name_limit_t &snlim)
4567 if (TREE_CODE (exp) == SSA_NAME)
4569 /* Handle non-zero single-character stores specially. */
4570 tree type = TREE_TYPE (exp);
4571 if (TREE_CODE (type) == INTEGER_TYPE
4572 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4573 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4574 && tree_expr_nonzero_p (exp))
4576 /* If the character EXP is known to be non-zero (even if its
4577 exact value is not known) recurse once to set the range
4578 for an arbitrary constant. */
4579 exp = build_int_cst (type, 1);
4580 return count_nonzero_bytes (exp, vuse, stmt,
4581 offset, 1, lenrange,
4582 nulterm, allnul, allnonnul, snlim);
4585 gimple *g = SSA_NAME_DEF_STMT (exp);
4586 if (gimple_assign_single_p (g))
4588 exp = gimple_assign_rhs1 (g);
4589 if (!DECL_P (exp)
4590 && TREE_CODE (exp) != CONSTRUCTOR
4591 && TREE_CODE (exp) != MEM_REF)
4592 return false;
4593 /* Handle DECLs, CONSTRUCTOR and MEM_REF below. */
4594 stmt = g;
4596 else if (gimple_code (g) == GIMPLE_PHI)
4598 /* Avoid processing an SSA_NAME that has already been visited
4599 or if an SSA_NAME limit has been reached. Indicate success
4600 if the former and failure if the latter. */
4601 if (int res = snlim.next_phi (exp))
4602 return res > 0;
4604 /* Determine the minimum and maximum from the PHI arguments. */
4605 unsigned int n = gimple_phi_num_args (g);
4606 for (unsigned i = 0; i != n; i++)
4608 tree def = gimple_phi_arg_def (g, i);
4609 if (!count_nonzero_bytes (def, vuse, g,
4610 offset, nbytes, lenrange, nulterm,
4611 allnul, allnonnul, snlim))
4612 return false;
4615 return true;
4619 if (TREE_CODE (exp) == CONSTRUCTOR)
4621 if (nbytes)
4622 /* If NBYTES has already been determined by an outer MEM_REF
4623 fail rather than overwriting it (this shouldn't happen). */
4624 return false;
4626 tree type = TREE_TYPE (exp);
4627 tree size = TYPE_SIZE_UNIT (type);
4628 if (!size || !tree_fits_uhwi_p (size))
4629 return false;
4631 unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size);
4632 if (byte_size < offset)
4633 return false;
4635 nbytes = byte_size - offset;
4638 if (TREE_CODE (exp) == MEM_REF)
4640 if (nbytes)
4641 return false;
4643 tree arg = TREE_OPERAND (exp, 0);
4644 tree off = TREE_OPERAND (exp, 1);
4646 if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4647 return false;
4649 unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4650 if (INT_MAX < wioff)
4651 return false;
4653 offset += wioff;
4654 if (INT_MAX < offset)
4655 return false;
4657 /* The size of the MEM_REF access determines the number of bytes. */
4658 tree type = TREE_TYPE (exp);
4659 tree typesize = TYPE_SIZE_UNIT (type);
4660 if (!typesize || !tree_fits_uhwi_p (typesize))
4661 return false;
4662 nbytes = tree_to_uhwi (typesize);
4663 if (!nbytes)
4664 return false;
4666 /* Handle MEM_REF = SSA_NAME types of assignments. */
4667 return count_nonzero_bytes_addr (arg, vuse, stmt,
4668 offset, nbytes, lenrange, nulterm,
4669 allnul, allnonnul, snlim);
4672 if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4674 /* If EXP can be folded into a constant use the result. Otherwise
4675 proceed to use EXP to determine a range of the result. */
4676 if (tree fold_exp = ctor_for_folding (exp))
4677 if (fold_exp != error_mark_node)
4678 exp = fold_exp;
4681 const char *prep = NULL;
4682 if (TREE_CODE (exp) == STRING_CST)
4684 unsigned nchars = TREE_STRING_LENGTH (exp);
4685 if (nchars < offset)
4686 return false;
4688 if (!nbytes)
4689 /* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4690 (i.e., it's the size of a pointer), or from MEM_REF (as the size
4691 of the access), set it here to the size of the string, including
4692 all internal and trailing nuls if the string has any. */
4693 nbytes = nchars - offset;
4694 else if (nchars - offset < nbytes)
4695 return false;
4697 prep = TREE_STRING_POINTER (exp) + offset;
4700 unsigned char buf[256];
4701 if (!prep)
4703 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4704 return false;
4705 /* If the pointer to representation hasn't been set above
4706 for STRING_CST point it at the buffer. */
4707 prep = reinterpret_cast <char *>(buf);
4708 /* Try to extract the representation of the constant object
4709 or expression starting from the offset. */
4710 unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
4711 if (repsize < nbytes)
4713 /* This should only happen when REPSIZE is zero because EXP
4714 doesn't denote an object with a known initializer, except
4715 perhaps when the reference reads past its end. */
4716 lenrange[0] = 0;
4717 prep = NULL;
4719 else if (!nbytes)
4720 nbytes = repsize;
4721 else if (nbytes < repsize)
4722 return false;
4725 if (!nbytes)
4726 return nonzero_bytes_for_type (TREE_TYPE (exp), lenrange,
4727 nulterm, allnul, allnonnul);
4729 /* Compute the number of leading nonzero bytes in the representation
4730 and update the minimum and maximum. */
4731 unsigned HOST_WIDE_INT n = prep ? strnlen (prep, nbytes) : nbytes;
4733 if (n < lenrange[0])
4734 lenrange[0] = n;
4735 if (lenrange[1] < n)
4736 lenrange[1] = n;
4738 /* Set the size of the representation. */
4739 if (lenrange[2] < nbytes)
4740 lenrange[2] = nbytes;
4742 /* Clear NULTERM if none of the bytes is zero. */
4743 if (n == nbytes)
4744 *nulterm = false;
4746 if (n)
4748 /* When the initial number of non-zero bytes N is non-zero, reset
4749 *ALLNUL; if N is less than that the size of the representation
4750 also clear *ALLNONNUL. */
4751 *allnul = false;
4752 if (n < nbytes)
4753 *allnonnul = false;
4755 else if (*allnul || *allnonnul)
4757 *allnonnul = false;
4759 if (*allnul)
4761 /* When either ALLNUL is set and N is zero, also determine
4762 whether all subsequent bytes after the first one (which
4763 is nul) are zero or nonzero and clear ALLNUL if not. */
4764 for (const char *p = prep; p != prep + nbytes; ++p)
4765 if (*p)
4767 *allnul = false;
4768 break;
4773 return true;
4776 /* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4777 bytes that are pointed to by EXP, which should be a pointer. */
4779 bool
4780 strlen_pass::count_nonzero_bytes_addr (tree exp, tree vuse, gimple *stmt,
4781 unsigned HOST_WIDE_INT offset,
4782 unsigned HOST_WIDE_INT nbytes,
4783 unsigned lenrange[3], bool *nulterm,
4784 bool *allnul, bool *allnonnul,
4785 ssa_name_limit_t &snlim)
4787 int idx = get_stridx (exp, stmt);
4788 if (idx > 0)
4790 /* get_strinfo reflects string lengths before the current statement,
4791 where the current statement is the outermost count_nonzero_bytes
4792 stmt. If there are any stores in between stmt and that
4793 current statement, the string length information might describe
4794 something significantly different. */
4795 if (gimple_vuse (stmt) != vuse)
4796 return false;
4798 strinfo *si = get_strinfo (idx);
4799 if (!si)
4800 return false;
4802 /* Handle both constant lengths as well non-constant lengths
4803 in some range. */
4804 unsigned HOST_WIDE_INT minlen, maxlen;
4805 if (tree_fits_shwi_p (si->nonzero_chars))
4806 minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4807 else if (si->nonzero_chars
4808 && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4810 int_range_max vr;
4811 if (!ptr_qry.rvals->range_of_expr (vr, si->nonzero_chars, stmt)
4812 || vr.undefined_p ()
4813 || vr.varying_p ())
4814 return false;
4816 minlen = vr.lower_bound ().to_uhwi ();
4817 maxlen = vr.upper_bound ().to_uhwi ();
4819 else
4820 return false;
4822 if (maxlen < offset)
4823 return false;
4825 minlen = minlen < offset ? 0 : minlen - offset;
4826 maxlen -= offset;
4827 if (maxlen + 1 < nbytes)
4828 return false;
4830 if (nbytes <= minlen || !si->full_string_p)
4831 *nulterm = false;
4833 if (nbytes < minlen)
4835 minlen = nbytes;
4836 if (nbytes < maxlen)
4837 maxlen = nbytes;
4840 if (!si->full_string_p)
4841 maxlen = nbytes;
4843 if (minlen < lenrange[0])
4844 lenrange[0] = minlen;
4845 if (lenrange[1] < maxlen)
4846 lenrange[1] = maxlen;
4848 if (lenrange[2] < nbytes)
4849 lenrange[2] = nbytes;
4851 /* Since only the length of the string are known and not its contents,
4852 clear ALLNUL and ALLNONNUL purely on the basis of the length. */
4853 *allnul = false;
4854 if (minlen < nbytes)
4855 *allnonnul = false;
4857 return true;
4860 if (TREE_CODE (exp) == ADDR_EXPR)
4861 return count_nonzero_bytes (TREE_OPERAND (exp, 0), vuse, stmt,
4862 offset, nbytes,
4863 lenrange, nulterm, allnul, allnonnul, snlim);
4865 if (TREE_CODE (exp) == SSA_NAME)
4867 gimple *g = SSA_NAME_DEF_STMT (exp);
4868 if (gimple_code (g) == GIMPLE_PHI)
4870 /* Avoid processing an SSA_NAME that has already been visited
4871 or if an SSA_NAME limit has been reached. Indicate success
4872 if the former and failure if the latter. */
4873 if (int res = snlim.next_phi (exp))
4874 return res > 0;
4876 /* Determine the minimum and maximum from the PHI arguments. */
4877 unsigned int n = gimple_phi_num_args (g);
4878 for (unsigned i = 0; i != n; i++)
4880 tree def = gimple_phi_arg_def (g, i);
4881 if (!count_nonzero_bytes_addr (def, vuse, g,
4882 offset, nbytes, lenrange,
4883 nulterm, allnul, allnonnul,
4884 snlim))
4885 return false;
4888 return true;
4892 /* Otherwise we don't know anything. */
4893 lenrange[0] = 0;
4894 if (lenrange[1] < nbytes)
4895 lenrange[1] = nbytes;
4896 if (lenrange[2] < nbytes)
4897 lenrange[2] = nbytes;
4898 *nulterm = false;
4899 *allnul = false;
4900 *allnonnul = false;
4901 return true;
4904 /* Same as above except with an implicit SSA_NAME limit. When EXPR_OR_TYPE
4905 is a type rather than an expression use its size to compute the range.
4906 RVALS is used to determine ranges of dynamically computed string lengths
4907 (the results of strlen). */
4909 bool
4910 strlen_pass::count_nonzero_bytes (tree expr_or_type, gimple *stmt,
4911 unsigned lenrange[3], bool *nulterm,
4912 bool *allnul, bool *allnonnul)
4914 if (TYPE_P (expr_or_type))
4915 return nonzero_bytes_for_type (expr_or_type, lenrange,
4916 nulterm, allnul, allnonnul);
4918 /* Set to optimistic values so the caller doesn't have to worry about
4919 initializing these and to what. On success, the function will clear
4920 these if it determines their values are different but being recursive
4921 it never sets either to true. On failure, their values are
4922 unspecified. */
4923 *nulterm = true;
4924 *allnul = true;
4925 *allnonnul = true;
4927 ssa_name_limit_t snlim;
4928 tree expr = expr_or_type;
4929 return count_nonzero_bytes (expr, gimple_vuse (stmt), stmt,
4930 0, 0, lenrange, nulterm, allnul, allnonnul,
4931 snlim);
4934 /* Handle a single or multibyte store other than by a built-in function,
4935 either via a single character assignment or by multi-byte assignment
4936 either via MEM_REF or via a type other than char (such as in
4937 '*(int*)a = 12345'). Return true to let the caller advance *GSI to
4938 the next statement in the basic block and false otherwise. */
4940 bool
4941 strlen_pass::handle_store (bool *zero_write)
4943 gimple *stmt = gsi_stmt (m_gsi);
4944 /* The LHS and RHS of the store. The RHS is null if STMT is a function
4945 call. STORETYPE is the type of the store (determined from either
4946 the RHS of the assignment statement or the LHS of a function call. */
4947 tree lhs, rhs, storetype;
4948 if (is_gimple_assign (stmt))
4950 lhs = gimple_assign_lhs (stmt);
4951 rhs = gimple_assign_rhs1 (stmt);
4952 storetype = TREE_TYPE (rhs);
4954 else if (is_gimple_call (stmt))
4956 lhs = gimple_call_lhs (stmt);
4957 rhs = NULL_TREE;
4958 storetype = TREE_TYPE (lhs);
4960 else
4961 return true;
4963 tree ssaname = NULL_TREE;
4964 strinfo *si = NULL;
4965 int idx = -1;
4967 range_query *const rvals = ptr_qry.rvals;
4969 /* The offset of the first byte in LHS modified by the store. */
4970 unsigned HOST_WIDE_INT offset = 0;
4972 if (TREE_CODE (lhs) == MEM_REF
4973 && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4975 tree mem_offset = TREE_OPERAND (lhs, 1);
4976 if (tree_fits_uhwi_p (mem_offset))
4978 /* Get the strinfo for the base, and use it if it starts with at
4979 least OFFSET nonzero characters. This is trivially true if
4980 OFFSET is zero. */
4981 offset = tree_to_uhwi (mem_offset);
4982 idx = get_stridx (TREE_OPERAND (lhs, 0), stmt);
4983 if (idx > 0)
4984 si = get_strinfo (idx);
4985 if (offset == 0)
4986 ssaname = TREE_OPERAND (lhs, 0);
4987 else if (si == NULL
4988 || compare_nonzero_chars (si, stmt, offset, rvals) < 0)
4990 *zero_write = rhs ? initializer_zerop (rhs) : false;
4992 bool dummy;
4993 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4994 if (count_nonzero_bytes (rhs ? rhs : storetype, stmt, lenrange,
4995 &dummy, &dummy, &dummy))
4996 maybe_warn_overflow (stmt, true, lenrange[2]);
4998 return true;
5002 else
5004 idx = get_addr_stridx (lhs, stmt, NULL_TREE, &offset, rvals);
5005 if (idx > 0)
5006 si = get_strinfo (idx);
5009 /* Minimum and maximum leading non-zero bytes and the size of the store. */
5010 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5012 /* Set to the minimum length of the string being assigned if known. */
5013 unsigned HOST_WIDE_INT rhs_minlen;
5015 /* STORING_NONZERO_P is true iff not all stored characters are zero.
5016 STORING_ALL_NONZERO_P is true if all stored characters are zero.
5017 STORING_ALL_ZEROS_P is true iff all stored characters are zero.
5018 Both are false when it's impossible to determine which is true. */
5019 bool storing_nonzero_p;
5020 bool storing_all_nonzero_p;
5021 bool storing_all_zeros_p;
5022 /* FULL_STRING_P is set when the stored sequence of characters form
5023 a nul-terminated string. */
5024 bool full_string_p;
5026 const bool ranges_valid
5027 = count_nonzero_bytes (rhs ? rhs : storetype, stmt,
5028 lenrange, &full_string_p,
5029 &storing_all_zeros_p, &storing_all_nonzero_p);
5031 if (ranges_valid)
5033 rhs_minlen = lenrange[0];
5034 storing_nonzero_p = lenrange[1] > 0;
5035 *zero_write = storing_all_zeros_p;
5037 maybe_warn_overflow (stmt, true, lenrange[2]);
5039 else
5041 rhs_minlen = HOST_WIDE_INT_M1U;
5042 full_string_p = false;
5043 storing_nonzero_p = false;
5044 storing_all_zeros_p = false;
5045 storing_all_nonzero_p = false;
5048 if (si != NULL)
5050 /* The count_nonzero_bytes call above might have unshared si.
5051 Fetch it again from the vector. */
5052 si = get_strinfo (idx);
5053 /* The corresponding element is set to 1 if the first and last
5054 element, respectively, of the sequence of characters being
5055 written over the string described by SI ends before
5056 the terminating nul (if it has one), to zero if the nul is
5057 being overwritten but not beyond, or negative otherwise. */
5058 int store_before_nul[2];
5059 if (ranges_valid)
5061 /* The offset of the last stored byte. */
5062 unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
5063 store_before_nul[0]
5064 = compare_nonzero_chars (si, stmt, offset, rvals);
5065 if (endoff == offset)
5066 store_before_nul[1] = store_before_nul[0];
5067 else
5068 store_before_nul[1]
5069 = compare_nonzero_chars (si, stmt, endoff, rvals);
5071 else
5073 store_before_nul[0]
5074 = compare_nonzero_chars (si, stmt, offset, rvals);
5075 store_before_nul[1] = store_before_nul[0];
5076 gcc_assert (offset == 0 || store_before_nul[0] >= 0);
5079 if (storing_all_zeros_p
5080 && store_before_nul[0] == 0
5081 && store_before_nul[1] == 0
5082 && si->full_string_p)
5084 /* When overwriting a '\0' with a '\0', the store can be removed
5085 if we know it has been stored in the current function. */
5086 if (!stmt_could_throw_p (cfun, stmt) && si->writable)
5088 unlink_stmt_vdef (stmt);
5089 release_defs (stmt);
5090 gsi_remove (&m_gsi, true);
5091 return false;
5093 else
5095 si->writable = true;
5096 gsi_next (&m_gsi);
5097 return false;
5101 if (store_before_nul[1] > 0
5102 && storing_nonzero_p
5103 && lenrange[0] == lenrange[1]
5104 && lenrange[0] == lenrange[2]
5105 && TREE_CODE (storetype) == INTEGER_TYPE)
5107 /* Handle a store of one or more non-nul characters that ends
5108 before the terminating nul of the destination and so does
5109 not affect its length
5110 If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
5111 and if we aren't storing '\0', we know that the length of
5112 the string and any other zero terminated string in memory
5113 remains the same. In that case we move to the next gimple
5114 statement and return to signal the caller that it shouldn't
5115 invalidate anything.
5117 This is beneficial for cases like:
5119 char p[20];
5120 void foo (char *q)
5122 strcpy (p, "foobar");
5123 size_t len = strlen (p); // can be folded to 6
5124 size_t len2 = strlen (q); // has to be computed
5125 p[0] = 'X';
5126 size_t len3 = strlen (p); // can be folded to 6
5127 size_t len4 = strlen (q); // can be folded to len2
5128 bar (len, len2, len3, len4);
5129 } */
5130 gsi_next (&m_gsi);
5131 return false;
5134 if (storing_nonzero_p
5135 || storing_all_zeros_p
5136 || (full_string_p && lenrange[1] == 0)
5137 || (offset != 0 && store_before_nul[1] > 0))
5139 /* When STORING_NONZERO_P, we know that the string will start
5140 with at least OFFSET + 1 nonzero characters. If storing
5141 a single character, set si->NONZERO_CHARS to the result.
5142 If storing multiple characters, try to determine the number
5143 of leading non-zero characters and set si->NONZERO_CHARS to
5144 the result instead.
5146 When STORING_ALL_ZEROS_P, or the first byte written is zero,
5147 i.e. FULL_STRING_P && LENRANGE[1] == 0, we know that the
5148 string is now OFFSET characters long.
5150 Otherwise, we're storing an unknown value at offset OFFSET,
5151 so need to clip the nonzero_chars to OFFSET.
5152 Use the minimum length of the string (or individual character)
5153 being stored if it's known. Otherwise, STORING_NONZERO_P
5154 guarantees it's at least 1. */
5155 HOST_WIDE_INT len
5156 = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
5157 location_t loc = gimple_location (stmt);
5158 tree oldlen = si->nonzero_chars;
5159 if (store_before_nul[1] == 0 && si->full_string_p)
5160 /* We're overwriting the nul terminator with a nonzero or
5161 unknown character. If the previous stmt was a memcpy,
5162 its length may be decreased. */
5163 adjust_last_stmt (si, stmt, false);
5164 si = unshare_strinfo (si);
5165 if (storing_nonzero_p)
5167 gcc_assert (len >= 0);
5168 si->nonzero_chars = build_int_cst (size_type_node, offset + len);
5170 else
5171 si->nonzero_chars = build_int_cst (size_type_node, offset);
5173 /* Set FULL_STRING_P only if the length of the strings being
5174 written is the same, and clear it if the strings have
5175 different lengths. In the latter case the length stored
5176 in si->NONZERO_CHARS becomes the lower bound.
5177 FIXME: Handle the upper bound of the length if possible. */
5178 si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
5180 if (storing_all_zeros_p
5181 && ssaname
5182 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5183 si->endptr = ssaname;
5184 else
5185 si->endptr = NULL;
5186 si->next = 0;
5187 si->stmt = NULL;
5188 si->writable = true;
5189 si->dont_invalidate = true;
5190 if (oldlen)
5192 tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
5193 si->nonzero_chars, oldlen);
5194 adjust_related_strinfos (loc, si, adj);
5196 else
5197 si->prev = 0;
5200 else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
5202 if (ssaname)
5203 idx = new_stridx (ssaname);
5204 else
5205 idx = new_addr_stridx (lhs);
5206 if (idx != 0)
5208 tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
5210 HOST_WIDE_INT slen;
5211 if (storing_all_zeros_p)
5212 slen = 0;
5213 else if (storing_nonzero_p && ranges_valid)
5215 /* FIXME: Handle the upper bound of the length when
5216 LENRANGE[0] != LENRANGE[1]. */
5217 slen = lenrange[0];
5218 if (lenrange[0] != lenrange[1])
5219 /* Set the minimum length but ignore the maximum
5220 for now. */
5221 full_string_p = false;
5223 else
5224 slen = -1;
5226 tree len = (slen <= 0
5227 ? size_zero_node
5228 : build_int_cst (size_type_node, slen));
5229 si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
5230 set_strinfo (idx, si);
5231 if (storing_all_zeros_p
5232 && ssaname
5233 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5234 si->endptr = ssaname;
5235 si->dont_invalidate = true;
5236 si->writable = true;
5239 else if (idx == 0
5240 && rhs_minlen < HOST_WIDE_INT_M1U
5241 && ssaname == NULL_TREE
5242 && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
5244 HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
5245 if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
5247 int idx = new_addr_stridx (lhs);
5248 if (idx != 0)
5250 si = new_strinfo (build_fold_addr_expr (lhs), idx,
5251 build_int_cst (size_type_node, rhs_minlen),
5252 full_string_p);
5253 set_strinfo (idx, si);
5254 si->dont_invalidate = true;
5259 if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5261 /* For single-byte stores only, allow adjust_last_stmt to remove
5262 the statement if the stored '\0' is immediately overwritten. */
5263 laststmt.stmt = stmt;
5264 laststmt.len = build_int_cst (size_type_node, 1);
5265 laststmt.stridx = si->idx;
5267 return true;
5270 /* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0. */
5272 static void
5273 fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5275 if (TREE_CODE (rhs1) != SSA_NAME
5276 || TREE_CODE (rhs2) != SSA_NAME)
5277 return;
5279 gimple *call_stmt = NULL;
5280 for (int pass = 0; pass < 2; pass++)
5282 gimple *g = SSA_NAME_DEF_STMT (rhs1);
5283 if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5284 && has_single_use (rhs1)
5285 && gimple_call_arg (g, 0) == rhs2)
5287 call_stmt = g;
5288 break;
5290 std::swap (rhs1, rhs2);
5293 if (call_stmt)
5295 tree arg0 = gimple_call_arg (call_stmt, 0);
5297 if (arg0 == rhs2)
5299 tree arg1 = gimple_call_arg (call_stmt, 1);
5300 tree arg1_len = NULL_TREE;
5301 int idx = get_stridx (arg1, call_stmt);
5303 if (idx)
5305 if (idx < 0)
5306 arg1_len = build_int_cst (size_type_node, ~idx);
5307 else
5309 strinfo *si = get_strinfo (idx);
5310 if (si)
5311 arg1_len = get_string_length (si);
5315 if (arg1_len != NULL_TREE)
5317 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5318 tree strncmp_decl = builtin_decl_explicit (BUILT_IN_STRNCMP);
5320 if (!is_gimple_val (arg1_len))
5322 tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5323 gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5324 arg1_len);
5325 gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5326 arg1_len = arg1_len_tmp;
5329 gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5330 arg0, arg1, arg1_len);
5331 tree strncmp_lhs = make_ssa_name (integer_type_node);
5332 gimple_set_vuse (strncmp_call, gimple_vuse (call_stmt));
5333 gimple_call_set_lhs (strncmp_call, strncmp_lhs);
5334 gsi_remove (&gsi, true);
5335 gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5336 tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5338 if (is_gimple_assign (stmt))
5340 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
5342 tree cond = gimple_assign_rhs1 (stmt);
5343 TREE_OPERAND (cond, 0) = strncmp_lhs;
5344 TREE_OPERAND (cond, 1) = zero;
5346 else
5348 gimple_assign_set_rhs1 (stmt, strncmp_lhs);
5349 gimple_assign_set_rhs2 (stmt, zero);
5352 else
5354 gcond *cond = as_a<gcond *> (stmt);
5355 gimple_cond_set_lhs (cond, strncmp_lhs);
5356 gimple_cond_set_rhs (cond, zero);
5358 update_stmt (stmt);
5364 /* Return true if TYPE corresponds to a narrow character type. */
5366 static bool
5367 is_char_type (tree type)
5369 return (TREE_CODE (type) == INTEGER_TYPE
5370 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5371 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5374 /* Check the built-in call at GSI for validity and optimize it.
5375 Uses RVALS to determine range information.
5376 Return true to let the caller advance *GSI to the next statement
5377 in the basic block and false otherwise. */
5379 bool
5380 strlen_pass::check_and_optimize_call (bool *zero_write)
5382 gimple *stmt = gsi_stmt (m_gsi);
5384 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5386 tree fntype = gimple_call_fntype (stmt);
5387 if (!fntype)
5388 return true;
5390 if (lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype)))
5392 handle_alloc_call (BUILT_IN_NONE);
5393 return true;
5396 if (tree lhs = gimple_call_lhs (stmt))
5397 handle_assign (lhs, zero_write);
5399 /* Proceed to handle user-defined formatting functions. */
5402 /* When not optimizing we must be checking printf calls which
5403 we do even for user-defined functions when they are declared
5404 with attribute format. */
5405 if (!flag_optimize_strlen
5406 || !strlen_optimize
5407 || !valid_builtin_call (stmt))
5408 return !handle_printf_call (&m_gsi, ptr_qry);
5410 tree callee = gimple_call_fndecl (stmt);
5411 switch (DECL_FUNCTION_CODE (callee))
5413 case BUILT_IN_STRLEN:
5414 case BUILT_IN_STRNLEN:
5415 handle_builtin_strlen ();
5416 break;
5417 case BUILT_IN_STRCHR:
5418 handle_builtin_strchr ();
5419 break;
5420 case BUILT_IN_STRCPY:
5421 case BUILT_IN_STRCPY_CHK:
5422 case BUILT_IN_STPCPY:
5423 case BUILT_IN_STPCPY_CHK:
5424 handle_builtin_strcpy (DECL_FUNCTION_CODE (callee));
5425 break;
5427 case BUILT_IN_STRNCAT:
5428 case BUILT_IN_STRNCAT_CHK:
5429 handle_builtin_strncat (DECL_FUNCTION_CODE (callee));
5430 break;
5432 case BUILT_IN_STPNCPY:
5433 case BUILT_IN_STPNCPY_CHK:
5434 case BUILT_IN_STRNCPY:
5435 case BUILT_IN_STRNCPY_CHK:
5436 handle_builtin_stxncpy_strncat (false);
5437 break;
5439 case BUILT_IN_MEMCPY:
5440 case BUILT_IN_MEMCPY_CHK:
5441 case BUILT_IN_MEMPCPY:
5442 case BUILT_IN_MEMPCPY_CHK:
5443 handle_builtin_memcpy (DECL_FUNCTION_CODE (callee));
5444 break;
5445 case BUILT_IN_STRCAT:
5446 case BUILT_IN_STRCAT_CHK:
5447 handle_builtin_strcat (DECL_FUNCTION_CODE (callee));
5448 break;
5449 case BUILT_IN_ALLOCA:
5450 case BUILT_IN_ALLOCA_WITH_ALIGN:
5451 case BUILT_IN_MALLOC:
5452 case BUILT_IN_CALLOC:
5453 handle_alloc_call (DECL_FUNCTION_CODE (callee));
5454 break;
5455 case BUILT_IN_MEMSET:
5456 if (handle_builtin_memset (zero_write))
5457 return false;
5458 break;
5459 case BUILT_IN_MEMCMP:
5460 if (handle_builtin_memcmp ())
5461 return false;
5462 break;
5463 case BUILT_IN_STRCMP:
5464 case BUILT_IN_STRNCMP:
5465 if (handle_builtin_string_cmp ())
5466 return false;
5467 break;
5468 default:
5469 if (handle_printf_call (&m_gsi, ptr_qry))
5470 return false;
5471 break;
5474 return true;
5477 /* Handle an assignment statement at *GSI to a LHS of integral type.
5478 If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true. */
5480 void
5481 strlen_pass::handle_integral_assign (bool *cleanup_eh)
5483 gimple *stmt = gsi_stmt (m_gsi);
5484 tree lhs = gimple_assign_lhs (stmt);
5485 tree lhs_type = TREE_TYPE (lhs);
5487 enum tree_code code = gimple_assign_rhs_code (stmt);
5488 if (code == COND_EXPR)
5490 tree cond = gimple_assign_rhs1 (stmt);
5491 enum tree_code cond_code = TREE_CODE (cond);
5493 if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5494 fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5495 TREE_OPERAND (cond, 1), stmt);
5497 else if (code == EQ_EXPR || code == NE_EXPR)
5498 fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt),
5499 gimple_assign_rhs2 (stmt), stmt);
5500 else if (gimple_assign_load_p (stmt)
5501 && TREE_CODE (lhs_type) == INTEGER_TYPE
5502 && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5503 && (TYPE_PRECISION (lhs_type)
5504 == TYPE_PRECISION (char_type_node))
5505 && !gimple_has_volatile_ops (stmt))
5507 tree off = integer_zero_node;
5508 unsigned HOST_WIDE_INT coff = 0;
5509 int idx = 0;
5510 tree rhs1 = gimple_assign_rhs1 (stmt);
5511 if (code == MEM_REF)
5513 idx = get_stridx (TREE_OPERAND (rhs1, 0), stmt);
5514 if (idx > 0)
5516 strinfo *si = get_strinfo (idx);
5517 if (si
5518 && si->nonzero_chars
5519 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5520 && (wi::to_widest (si->nonzero_chars)
5521 >= wi::to_widest (off)))
5522 off = TREE_OPERAND (rhs1, 1);
5523 else
5524 /* This case is not useful. See if get_addr_stridx
5525 returns something usable. */
5526 idx = 0;
5529 if (idx <= 0)
5530 idx = get_addr_stridx (rhs1, stmt, NULL_TREE, &coff);
5531 if (idx > 0)
5533 strinfo *si = get_strinfo (idx);
5534 if (si
5535 && si->nonzero_chars
5536 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5538 widest_int w1 = wi::to_widest (si->nonzero_chars);
5539 widest_int w2 = wi::to_widest (off) + coff;
5540 if (w1 == w2
5541 && si->full_string_p)
5543 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5545 fprintf (dump_file, "Optimizing: ");
5546 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5549 /* Reading the final '\0' character. */
5550 tree zero = build_int_cst (lhs_type, 0);
5551 gimple_set_vuse (stmt, NULL_TREE);
5552 gimple_assign_set_rhs_from_tree (&m_gsi, zero);
5553 *cleanup_eh
5554 |= maybe_clean_or_replace_eh_stmt (stmt,
5555 gsi_stmt (m_gsi));
5556 stmt = gsi_stmt (m_gsi);
5557 update_stmt (stmt);
5559 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5561 fprintf (dump_file, "into: ");
5562 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5565 else if (w1 > w2)
5567 /* Reading a character before the final '\0'
5568 character. Just set the value range to ~[0, 0]
5569 if we don't have anything better. */
5570 int_range_max r;
5571 if (!get_range_query (cfun)->range_of_expr (r, lhs)
5572 || r.varying_p ())
5574 r.set_nonzero (lhs_type);
5575 set_range_info (lhs, r);
5581 else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5583 if (int idx = new_stridx (lhs))
5585 /* Record multi-byte assignments from MEM_REFs. */
5586 bool storing_all_nonzero_p;
5587 bool storing_all_zeros_p;
5588 bool full_string_p;
5589 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5590 tree rhs = gimple_assign_rhs1 (stmt);
5591 const bool ranges_valid
5592 = count_nonzero_bytes (rhs, stmt,
5593 lenrange, &full_string_p,
5594 &storing_all_zeros_p,
5595 &storing_all_nonzero_p);
5596 if (ranges_valid)
5598 tree length = build_int_cst (sizetype, lenrange[0]);
5599 strinfo *si = new_strinfo (lhs, idx, length, full_string_p);
5600 set_strinfo (idx, si);
5601 si->writable = true;
5602 si->dont_invalidate = true;
5607 if (strlen_to_stridx)
5609 tree rhs1 = gimple_assign_rhs1 (stmt);
5610 if (stridx_strlenloc *ps = strlen_to_stridx->get (rhs1))
5611 strlen_to_stridx->put (lhs, stridx_strlenloc (*ps));
5615 /* Handle assignment statement at *GSI to LHS. Set *ZERO_WRITE if
5616 the assignment stores all zero bytes. */
5618 bool
5619 strlen_pass::handle_assign (tree lhs, bool *zero_write)
5621 tree type = TREE_TYPE (lhs);
5622 if (TREE_CODE (type) == ARRAY_TYPE)
5623 type = TREE_TYPE (type);
5625 bool is_char_store = is_char_type (type);
5626 if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5628 /* To consider stores into char objects via integer types other
5629 than char but not those to non-character objects, determine
5630 the type of the destination rather than just the type of
5631 the access. */
5632 for (int i = 0; i != 2; ++i)
5634 tree ref = TREE_OPERAND (lhs, i);
5635 type = TREE_TYPE (ref);
5636 if (TREE_CODE (type) == POINTER_TYPE)
5637 type = TREE_TYPE (type);
5638 if (TREE_CODE (type) == ARRAY_TYPE)
5639 type = TREE_TYPE (type);
5640 if (is_char_type (type))
5642 is_char_store = true;
5643 break;
5648 /* Handle a single or multibyte assignment. */
5649 if (is_char_store && !handle_store (zero_write))
5650 return false;
5652 return true;
5656 /* Attempt to check for validity of the performed access a single statement
5657 at *GSI using string length knowledge, and to optimize it.
5658 If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5659 true. Return true to let the caller advance *GSI to the next statement
5660 in the basic block and false otherwise. */
5662 bool
5663 strlen_pass::check_and_optimize_stmt (bool *cleanup_eh)
5665 gimple *stmt = gsi_stmt (m_gsi);
5667 /* For statements that modify a string, set to true if the write
5668 is only zeros. */
5669 bool zero_write = false;
5671 if (is_gimple_call (stmt))
5673 if (!check_and_optimize_call (&zero_write))
5674 return false;
5676 else if (!flag_optimize_strlen || !strlen_optimize)
5677 return true;
5678 else if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
5680 /* Handle non-clobbering assignment. */
5681 tree lhs = gimple_assign_lhs (stmt);
5682 tree lhs_type = TREE_TYPE (lhs);
5684 if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5686 if (gimple_assign_single_p (stmt)
5687 || (gimple_assign_cast_p (stmt)
5688 && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5690 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
5691 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5693 else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
5694 handle_pointer_plus ();
5696 else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5697 /* Handle assignment to a character. */
5698 handle_integral_assign (cleanup_eh);
5699 else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5700 if (!handle_assign (lhs, &zero_write))
5701 return false;
5703 else if (gcond *cond = dyn_cast<gcond *> (stmt))
5705 enum tree_code code = gimple_cond_code (cond);
5706 if (code == EQ_EXPR || code == NE_EXPR)
5707 fold_strstr_to_strncmp (gimple_cond_lhs (stmt),
5708 gimple_cond_rhs (stmt), stmt);
5711 if (gimple_vdef (stmt))
5712 maybe_invalidate (stmt, zero_write);
5713 return true;
5716 /* Recursively call maybe_invalidate on stmts that might be executed
5717 in between dombb and current bb and that contain a vdef. Stop when
5718 *count stmts are inspected, or if the whole strinfo vector has
5719 been invalidated. */
5721 static void
5722 do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5724 unsigned int i, n = gimple_phi_num_args (phi);
5726 for (i = 0; i < n; i++)
5728 tree vuse = gimple_phi_arg_def (phi, i);
5729 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5730 basic_block bb = gimple_bb (stmt);
5731 if (bb == NULL
5732 || bb == dombb
5733 || !bitmap_set_bit (visited, bb->index)
5734 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5735 continue;
5736 while (1)
5738 if (gimple_code (stmt) == GIMPLE_PHI)
5740 do_invalidate (dombb, stmt, visited, count);
5741 if (*count == 0)
5742 return;
5743 break;
5745 if (--*count == 0)
5746 return;
5747 if (!maybe_invalidate (stmt))
5749 *count = 0;
5750 return;
5752 vuse = gimple_vuse (stmt);
5753 stmt = SSA_NAME_DEF_STMT (vuse);
5754 if (gimple_bb (stmt) != bb)
5756 bb = gimple_bb (stmt);
5757 if (bb == NULL
5758 || bb == dombb
5759 || !bitmap_set_bit (visited, bb->index)
5760 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5761 break;
5767 /* Release pointer_query cache. */
5769 strlen_pass::~strlen_pass ()
5771 ptr_qry.flush_cache ();
5774 /* Callback for walk_dominator_tree. Attempt to optimize various
5775 string ops by remembering string lengths pointed by pointer SSA_NAMEs. */
5777 edge
5778 strlen_pass::before_dom_children (basic_block bb)
5780 basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5782 if (dombb == NULL)
5783 stridx_to_strinfo = NULL;
5784 else
5786 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5787 if (stridx_to_strinfo)
5789 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5790 gsi_next (&gsi))
5792 gphi *phi = gsi.phi ();
5793 if (virtual_operand_p (gimple_phi_result (phi)))
5795 bitmap visited = BITMAP_ALLOC (NULL);
5796 int count_vdef = 100;
5797 do_invalidate (dombb, phi, visited, &count_vdef);
5798 BITMAP_FREE (visited);
5799 if (count_vdef == 0)
5801 /* If there were too many vdefs in between immediate
5802 dominator and current bb, invalidate everything.
5803 If stridx_to_strinfo has been unshared, we need
5804 to free it, otherwise just set it to NULL. */
5805 if (!strinfo_shared ())
5807 unsigned int i;
5808 strinfo *si;
5810 for (i = 1;
5811 vec_safe_iterate (stridx_to_strinfo, i, &si);
5812 ++i)
5814 free_strinfo (si);
5815 (*stridx_to_strinfo)[i] = NULL;
5818 else
5819 stridx_to_strinfo = NULL;
5821 break;
5827 /* If all PHI arguments have the same string index, the PHI result
5828 has it as well. */
5829 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5830 gsi_next (&gsi))
5832 gphi *phi = gsi.phi ();
5833 tree result = gimple_phi_result (phi);
5834 if (!virtual_operand_p (result) && POINTER_TYPE_P (TREE_TYPE (result)))
5836 int idx = get_stridx (gimple_phi_arg_def (phi, 0), phi);
5837 if (idx != 0)
5839 unsigned int i, n = gimple_phi_num_args (phi);
5840 for (i = 1; i < n; i++)
5841 if (idx != get_stridx (gimple_phi_arg_def (phi, i), phi))
5842 break;
5843 if (i == n)
5844 ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5849 bool cleanup_eh = false;
5851 /* Attempt to optimize individual statements. */
5852 for (m_gsi = gsi_start_bb (bb); !gsi_end_p (m_gsi); )
5854 /* Reset search depth performance counter. */
5855 ptr_qry.depth = 0;
5857 if (check_and_optimize_stmt (&cleanup_eh))
5858 gsi_next (&m_gsi);
5861 if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5862 m_cleanup_cfg = true;
5864 bb->aux = stridx_to_strinfo;
5865 if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
5866 (*stridx_to_strinfo)[0] = (strinfo *) bb;
5867 return NULL;
5870 /* Callback for walk_dominator_tree. Free strinfo vector if it is
5871 owned by the current bb, clear bb->aux. */
5873 void
5874 strlen_pass::after_dom_children (basic_block bb)
5876 if (bb->aux)
5878 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5879 if (vec_safe_length (stridx_to_strinfo)
5880 && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5882 unsigned int i;
5883 strinfo *si;
5885 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
5886 free_strinfo (si);
5887 vec_free (stridx_to_strinfo);
5889 bb->aux = NULL;
5893 namespace {
5895 static unsigned int
5896 printf_strlen_execute (function *fun, bool warn_only)
5898 strlen_optimize = !warn_only;
5900 calculate_dominance_info (CDI_DOMINATORS);
5901 loop_optimizer_init (LOOPS_NORMAL);
5902 scev_initialize ();
5904 gcc_assert (!strlen_to_stridx);
5905 if (warn_stringop_overflow || warn_stringop_truncation)
5906 strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5908 /* This has to happen after initializing the loop optimizer
5909 and initializing SCEV as they create new SSA_NAMEs. */
5910 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
5911 max_stridx = 1;
5913 enable_ranger (fun);
5914 /* String length optimization is implemented as a walk of the dominator
5915 tree and a forward walk of statements within each block. */
5916 strlen_pass walker (fun, CDI_DOMINATORS);
5917 walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5919 if (dump_file && (dump_flags & TDF_DETAILS))
5920 walker.ptr_qry.dump (dump_file, true);
5922 ssa_ver_to_stridx.release ();
5923 strinfo_pool.release ();
5924 if (decl_to_stridxlist_htab)
5926 obstack_free (&stridx_obstack, NULL);
5927 delete decl_to_stridxlist_htab;
5928 decl_to_stridxlist_htab = NULL;
5930 laststmt.stmt = NULL;
5931 laststmt.len = NULL_TREE;
5932 laststmt.stridx = 0;
5934 if (strlen_to_stridx)
5936 strlen_to_stridx->empty ();
5937 delete strlen_to_stridx;
5938 strlen_to_stridx = NULL;
5941 disable_ranger (fun);
5942 scev_finalize ();
5943 loop_optimizer_finalize ();
5945 return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5948 /* This file defines two passes: one for warnings that runs only when
5949 optimization is disabled, and another that implements optimizations
5950 and also issues warnings. */
5952 const pass_data pass_data_warn_printf =
5954 GIMPLE_PASS, /* type */
5955 "warn-printf", /* name */
5956 OPTGROUP_NONE, /* optinfo_flags */
5957 TV_NONE, /* tv_id */
5958 /* Normally an optimization pass would require PROP_ssa but because
5959 this pass runs early, with no optimization, to do sprintf format
5960 checking, it only requires PROP_cfg. */
5961 PROP_cfg, /* properties_required */
5962 0, /* properties_provided */
5963 0, /* properties_destroyed */
5964 0, /* todo_flags_start */
5965 0, /* todo_flags_finish */
5968 class pass_warn_printf : public gimple_opt_pass
5970 public:
5971 pass_warn_printf (gcc::context *ctxt)
5972 : gimple_opt_pass (pass_data_warn_printf, ctxt)
5975 bool gate (function *) final override;
5976 unsigned int execute (function *fun) final override
5978 return printf_strlen_execute (fun, true);
5983 /* Return true to run the warning pass only when not optimizing and
5984 iff either -Wformat-overflow or -Wformat-truncation is specified. */
5986 bool
5987 pass_warn_printf::gate (function *)
5989 return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
5992 const pass_data pass_data_strlen =
5994 GIMPLE_PASS, /* type */
5995 "strlen", /* name */
5996 OPTGROUP_NONE, /* optinfo_flags */
5997 TV_TREE_STRLEN, /* tv_id */
5998 PROP_cfg | PROP_ssa, /* properties_required */
5999 0, /* properties_provided */
6000 0, /* properties_destroyed */
6001 0, /* todo_flags_start */
6002 0, /* todo_flags_finish */
6005 class pass_strlen : public gimple_opt_pass
6007 public:
6008 pass_strlen (gcc::context *ctxt)
6009 : gimple_opt_pass (pass_data_strlen, ctxt)
6012 opt_pass * clone () final override { return new pass_strlen (m_ctxt); }
6014 bool gate (function *) final override;
6015 unsigned int execute (function *fun) final override
6017 return printf_strlen_execute (fun, false);
6021 /* Return true to run the pass only when the sprintf and/or strlen
6022 optimizations are enabled and -Wformat-overflow or -Wformat-truncation
6023 are specified. */
6025 bool
6026 pass_strlen::gate (function *)
6028 return ((warn_format_overflow > 0
6029 || warn_format_trunc > 0
6030 || warn_restrict > 0
6031 || flag_optimize_strlen > 0
6032 || flag_printf_return_value)
6033 && optimize > 0);
6036 } // anon namespace
6038 gimple_opt_pass *
6039 make_pass_warn_printf (gcc::context *ctxt)
6041 return new pass_warn_printf (ctxt);
6044 gimple_opt_pass *
6045 make_pass_strlen (gcc::context *ctxt)
6047 return new pass_strlen (ctxt);