gettext: Sync with gettext 0.23.
[gnulib.git] / lib / fts.c
blob92306bc72f628f696ed49396181ef4a534cf6389
1 /* Traverse a file hierarchy.
3 Copyright (C) 2004-2024 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /*-
19 * Copyright (c) 1990, 1993, 1994
20 * The Regents of the University of California. All rights reserved.
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
47 #include <config.h>
49 #if defined LIBC_SCCS && !defined GCC_LINT && !defined lint
50 static char sccsid[] = "@(#)fts.c 8.6 (Berkeley) 8/14/94";
51 #endif
53 #include "fts_.h"
55 #if HAVE_SYS_PARAM_H || defined _LIBC
56 # include <sys/param.h>
57 #endif
58 #ifdef _LIBC
59 # include <include/sys/stat.h>
60 #else
61 # include <sys/stat.h>
62 #endif
63 #include <fcntl.h>
64 #include <errno.h>
65 #include <stddef.h>
66 #include <stdint.h>
67 #include <stdlib.h>
68 #include <string.h>
69 #include <unistd.h>
71 #if ! _LIBC
72 # include "attribute.h"
73 # include "fcntl--.h"
74 # include "flexmember.h"
75 # include "openat.h"
76 # include "opendirat.h"
77 # include "same-inode.h"
78 #endif
80 #include <dirent.h>
81 #ifndef _D_EXACT_NAMLEN
82 # define _D_EXACT_NAMLEN(dirent) strlen ((dirent)->d_name)
83 #endif
85 #if HAVE_STRUCT_DIRENT_D_TYPE
86 /* True if the type of the directory entry D is known. */
87 # define DT_IS_KNOWN(d) ((d)->d_type != DT_UNKNOWN)
88 /* True if the type of the directory entry D must be T. */
89 # define DT_MUST_BE(d, t) ((d)->d_type == (t))
90 # define D_TYPE(d) ((d)->d_type)
91 #else
92 # define DT_IS_KNOWN(d) false
93 # define DT_MUST_BE(d, t) false
94 # define D_TYPE(d) DT_UNKNOWN
96 # undef DT_UNKNOWN
97 # define DT_UNKNOWN 0
99 /* Any nonzero values will do here, so long as they're distinct.
100 Undef any existing macros out of the way. */
101 # undef DT_BLK
102 # undef DT_CHR
103 # undef DT_DIR
104 # undef DT_FIFO
105 # undef DT_LNK
106 # undef DT_REG
107 # undef DT_SOCK
108 # define DT_BLK 1
109 # define DT_CHR 2
110 # define DT_DIR 3
111 # define DT_FIFO 4
112 # define DT_LNK 5
113 # define DT_REG 6
114 # define DT_SOCK 7
115 #endif
117 #ifndef S_IFBLK
118 # define S_IFBLK 0
119 #endif
120 #ifndef S_IFLNK
121 # define S_IFLNK 0
122 #endif
123 #ifndef S_IFSOCK
124 # define S_IFSOCK 0
125 #endif
127 enum
129 NOT_AN_INODE_NUMBER = 0
132 #ifdef D_INO_IN_DIRENT
133 # define D_INO(dp) (dp)->d_ino
134 #else
135 /* Some systems don't have inodes, so fake them to avoid lots of ifdefs. */
136 # define D_INO(dp) NOT_AN_INODE_NUMBER
137 #endif
139 /* If possible (see max_entries, below), read no more than this many directory
140 entries at a time. Without this limit (i.e., when using non-NULL
141 fts_compar), processing a directory with 4,000,000 entries requires ~1GiB
142 of memory, and handling 64M entries would require 16GiB of memory. */
143 #ifndef FTS_MAX_READDIR_ENTRIES
144 # define FTS_MAX_READDIR_ENTRIES 100000
145 #endif
147 /* If there are more than this many entries in a directory,
148 and the conditions mentioned below are satisfied, then sort
149 the entries on inode number before any further processing. */
150 #ifndef FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD
151 # define FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD 10000
152 #endif
154 enum
156 _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD = FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD
159 enum Fts_stat
161 FTS_NO_STAT_REQUIRED = 1,
162 FTS_STAT_REQUIRED = 2
165 #ifdef _LIBC
166 # undef close
167 # define close __close
168 # undef closedir
169 # define closedir __closedir
170 # undef fchdir
171 # define fchdir __fchdir
172 # undef open
173 # define open __open
174 # undef readdir
175 # define readdir __readdir
176 #else
177 # undef internal_function
178 # define internal_function /* empty */
179 #endif
181 #ifndef __set_errno
182 # define __set_errno(Val) errno = (Val)
183 #endif
185 /* If this host provides the openat function, then we can avoid
186 attempting to open "." in some initialization code below. */
187 #ifdef HAVE_OPENAT
188 # define HAVE_OPENAT_SUPPORT 1
189 #else
190 # define HAVE_OPENAT_SUPPORT 0
191 #endif
193 #ifdef NDEBUG
194 # define fts_assert(expr) ((void) (0 && (expr)))
195 #else
196 # define fts_assert(expr) \
197 do \
199 if (!(expr)) \
200 abort (); \
202 while (false)
203 #endif
205 #ifdef _LIBC
206 # if __glibc_has_attribute (__fallthrough__)
207 # define FALLTHROUGH __attribute__ ((__fallthrough__))
208 # else
209 # define FALLTHROUGH ((void) 0)
210 # endif
211 #endif
213 static FTSENT *fts_alloc (FTS *, const char *, size_t) internal_function;
214 static FTSENT *fts_build (FTS *, int) internal_function;
215 static void fts_lfree (FTSENT *) internal_function;
216 static void fts_load (FTS *, FTSENT *) internal_function;
217 static size_t fts_maxarglen (char * const *) internal_function;
218 static void fts_padjust (FTS *, FTSENT *) internal_function;
219 static bool fts_palloc (FTS *, size_t) internal_function;
220 static FTSENT *fts_sort (FTS *, FTSENT *, size_t) internal_function;
221 static unsigned short int fts_stat (FTS *, FTSENT *, bool) internal_function;
222 static int fts_safe_changedir (FTS *, FTSENT *, int, const char *)
223 internal_function;
225 #include "fts-cycle.c"
227 #ifndef MAX
228 # define MAX(a,b) ((a) > (b) ? (a) : (b))
229 #endif
231 #ifndef SIZE_MAX
232 # define SIZE_MAX ((size_t) -1)
233 #endif
235 #define ISDOT(a) (a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2])))
236 #define STREQ(a, b) (strcmp (a, b) == 0)
238 #define CLR(opt) (sp->fts_options &= ~(opt))
239 #define ISSET(opt) ((sp->fts_options & (opt)) != 0)
240 #define SET(opt) (sp->fts_options |= (opt))
242 /* FIXME: FTS_NOCHDIR is now misnamed.
243 Call it FTS_USE_FULL_RELATIVE_FILE_NAMES instead. */
244 #define FCHDIR(sp, fd) \
245 (!ISSET(FTS_NOCHDIR) && (ISSET(FTS_CWDFD) \
246 ? (cwd_advance_fd ((sp), (fd), true), 0) \
247 : fchdir (fd)))
250 /* fts_build flags */
251 /* FIXME: make this an enum */
252 #define BCHILD 1 /* fts_children */
253 #define BNAMES 2 /* fts_children, names only */
254 #define BREAD 3 /* fts_read */
256 #if GNULIB_FTS_DEBUG
257 # include <inttypes.h>
258 # include <stdio.h>
259 bool fts_debug = false;
260 # define Dprintf(x) do { if (fts_debug) printf x; } while (false)
261 static void fd_ring_check (FTS const *);
262 static void fd_ring_print (FTS const *, FILE *, char const *);
263 #else
264 # define Dprintf(x)
265 # define fd_ring_check(x)
266 # define fd_ring_print(a, b, c)
267 #endif
269 #define LEAVE_DIR(Fts, Ent, Tag) \
270 do \
272 Dprintf ((" %s-leaving: %s\n", Tag, (Ent)->fts_path)); \
273 leave_dir (Fts, Ent); \
274 fd_ring_check (Fts); \
276 while (false)
278 static void
279 fd_ring_clear (I_ring *fd_ring)
281 while ( ! i_ring_empty (fd_ring))
283 int fd = i_ring_pop (fd_ring);
284 if (0 <= fd)
285 close (fd);
289 /* Overload the fts_statp->st_size member (otherwise unused, when
290 fts_info is FTS_NSOK) to indicate whether fts_read should stat
291 this entry or not. */
292 static void
293 fts_set_stat_required (FTSENT *p, bool required)
295 fts_assert (p->fts_info == FTS_NSOK);
296 p->fts_statp->st_size = (required
297 ? FTS_STAT_REQUIRED
298 : FTS_NO_STAT_REQUIRED);
301 /* Virtual fchdir. Advance SP's working directory file descriptor,
302 SP->fts_cwd_fd, to FD, and push the previous value onto the fd_ring.
303 CHDIR_DOWN_ONE is true if FD corresponds to an entry in the directory
304 open on sp->fts_cwd_fd; i.e., to move the working directory one level
305 down. */
306 static void
307 internal_function
308 cwd_advance_fd (FTS *sp, int fd, bool chdir_down_one)
310 int old = sp->fts_cwd_fd;
311 fts_assert (old != fd || old == AT_FDCWD);
313 if (chdir_down_one)
315 /* Push "old" onto the ring.
316 If the displaced file descriptor is non-negative, close it. */
317 int prev_fd_in_slot = i_ring_push (&sp->fts_fd_ring, old);
318 fd_ring_print (sp, stderr, "post-push");
319 if (0 <= prev_fd_in_slot)
320 close (prev_fd_in_slot); /* ignore any close failure */
322 else if ( ! ISSET (FTS_NOCHDIR))
324 if (0 <= old)
325 close (old); /* ignore any close failure */
328 sp->fts_cwd_fd = fd;
331 /* Restore the initial, pre-traversal, "working directory".
332 In FTS_CWDFD mode, we merely call cwd_advance_fd, otherwise,
333 we may actually change the working directory.
334 Return 0 upon success. Upon failure, set errno and return nonzero. */
335 static int
336 restore_initial_cwd (FTS *sp)
338 int fail = FCHDIR (sp, ISSET (FTS_CWDFD) ? AT_FDCWD : sp->fts_rfd);
339 fd_ring_clear (&(sp->fts_fd_ring));
340 return fail;
343 /* Open the directory DIR if possible, and return a file
344 descriptor. Return -1 and set errno on failure. It doesn't matter
345 whether the file descriptor has read or write access. */
347 static int
348 internal_function
349 diropen (FTS const *sp, char const *dir)
351 int open_flags = (O_SEARCH | O_CLOEXEC | O_DIRECTORY | O_NOCTTY | O_NONBLOCK
352 | (ISSET (FTS_PHYSICAL) ? O_NOFOLLOW : 0));
354 int fd = (ISSET (FTS_CWDFD)
355 ? openat (sp->fts_cwd_fd, dir, open_flags)
356 : open (dir, open_flags));
357 return fd;
360 FTS *
361 fts_open (char * const *argv,
362 register int options,
363 int (*compar) (FTSENT const **, FTSENT const **))
365 register FTS *sp;
366 register FTSENT *p, *root;
367 register size_t nitems;
368 FTSENT *parent = NULL;
369 FTSENT *tmp = NULL; /* pacify gcc */
370 bool defer_stat;
372 /* Options check. */
373 if (options & ~FTS_OPTIONMASK) {
374 __set_errno (EINVAL);
375 return (NULL);
377 if ((options & FTS_NOCHDIR) && (options & FTS_CWDFD)) {
378 __set_errno (EINVAL);
379 return (NULL);
381 if ( ! (options & (FTS_LOGICAL | FTS_PHYSICAL))) {
382 __set_errno (EINVAL);
383 return (NULL);
386 /* Allocate/initialize the stream */
387 sp = calloc (1, sizeof *sp);
388 if (sp == NULL)
389 return (NULL);
390 sp->fts_compar = compar;
391 sp->fts_options = options;
393 /* Logical walks turn on NOCHDIR; symbolic links are too hard. */
394 if (ISSET(FTS_LOGICAL)) {
395 SET(FTS_NOCHDIR);
396 CLR(FTS_CWDFD);
399 /* Initialize fts_cwd_fd. */
400 sp->fts_cwd_fd = AT_FDCWD;
401 if ( ISSET(FTS_CWDFD) && ! HAVE_OPENAT_SUPPORT)
403 /* While it isn't technically necessary to open "." this
404 early, doing it here saves us the trouble of ensuring
405 later (where it'd be messier) that "." can in fact
406 be opened. If not, revert to FTS_NOCHDIR mode. */
407 int fd = open (".", O_SEARCH | O_CLOEXEC);
408 if (fd < 0)
410 /* Even if "." is unreadable, don't revert to FTS_NOCHDIR mode
411 on systems like Linux+PROC_FS, where our openat emulation
412 is good enough. Note: on a system that emulates
413 openat via /proc, this technique can still fail, but
414 only in extreme conditions, e.g., when the working
415 directory cannot be saved (i.e. save_cwd fails) --
416 and that happens on Linux only when "." is unreadable
417 and the CWD would be longer than PATH_MAX.
418 FIXME: once Linux kernel openat support is well established,
419 replace the above open call and this entire if/else block
420 with the body of the if-block below. */
421 if ( openat_needs_fchdir ())
423 SET(FTS_NOCHDIR);
424 CLR(FTS_CWDFD);
427 else
429 close (fd);
434 * Start out with 1K of file name space, and enough, in any case,
435 * to hold the user's file names.
437 #ifndef MAXPATHLEN
438 # define MAXPATHLEN 1024
439 #endif
441 size_t maxarglen = fts_maxarglen(argv);
442 if (! fts_palloc(sp, MAX(maxarglen, MAXPATHLEN)))
443 goto mem1;
446 /* Allocate/initialize root's parent. */
447 if (*argv != NULL) {
448 if ((parent = fts_alloc(sp, "", 0)) == NULL)
449 goto mem2;
450 parent->fts_level = FTS_ROOTPARENTLEVEL;
453 /* The classic fts implementation would call fts_stat with
454 a new entry for each iteration of the loop below.
455 If the comparison function is not specified or if the
456 FTS_DEFER_STAT option is in effect, don't stat any entry
457 in this loop. This is an attempt to minimize the interval
458 between the initial stat/lstat/fstatat and the point at which
459 a directory argument is first opened. This matters for any
460 directory command line argument that resides on a file system
461 without genuine i-nodes. If you specify FTS_DEFER_STAT along
462 with a comparison function, that function must not access any
463 data via the fts_statp pointer. */
464 defer_stat = (compar == NULL || ISSET(FTS_DEFER_STAT));
466 /* Allocate/initialize root(s). */
467 for (root = NULL, nitems = 0; *argv != NULL; ++argv, ++nitems) {
468 /* *Do* allow zero-length file names. */
469 size_t len = strlen(*argv);
471 if ( ! (options & FTS_VERBATIM))
473 /* If there are two or more trailing slashes, trim all but one,
474 but don't change "//" to "/", and do map "///" to "/". */
475 char const *v = *argv;
476 if (2 < len && v[len - 1] == '/')
477 while (1 < len && v[len - 2] == '/')
478 --len;
481 if ((p = fts_alloc(sp, *argv, len)) == NULL)
482 goto mem3;
483 p->fts_level = FTS_ROOTLEVEL;
484 p->fts_parent = parent;
485 p->fts_accpath = p->fts_name;
486 /* Even when defer_stat is true, be sure to stat the first
487 command line argument, since fts_read (at least with
488 FTS_XDEV) requires that. */
489 if (defer_stat && root != NULL) {
490 p->fts_info = FTS_NSOK;
491 fts_set_stat_required(p, true);
492 } else {
493 p->fts_info = fts_stat(sp, p, false);
497 * If comparison routine supplied, traverse in sorted
498 * order; otherwise traverse in the order specified.
500 if (compar) {
501 p->fts_link = root;
502 root = p;
503 } else {
504 p->fts_link = NULL;
505 if (root == NULL)
506 tmp = root = p;
507 else {
508 tmp->fts_link = p;
509 tmp = p;
513 if (compar && nitems > 1)
514 root = fts_sort(sp, root, nitems);
517 * Allocate a dummy pointer and make fts_read think that we've just
518 * finished the node before the root(s); set p->fts_info to FTS_INIT
519 * so that everything about the "current" node is ignored.
521 if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL)
522 goto mem3;
523 sp->fts_cur->fts_link = root;
524 sp->fts_cur->fts_info = FTS_INIT;
525 sp->fts_cur->fts_level = 1;
526 if (! setup_dir (sp))
527 goto mem3;
530 * If using chdir(2), grab a file descriptor pointing to dot to ensure
531 * that we can get back here; this could be avoided for some file names,
532 * but almost certainly not worth the effort. Slashes, symbolic links,
533 * and ".." are all fairly nasty problems. Note, if we can't get the
534 * descriptor we run anyway, just more slowly.
536 if (!ISSET(FTS_NOCHDIR) && !ISSET(FTS_CWDFD)
537 && (sp->fts_rfd = diropen (sp, ".")) < 0)
538 SET(FTS_NOCHDIR);
540 i_ring_init (&sp->fts_fd_ring, -1);
541 return (sp);
543 mem3: fts_lfree(root);
544 free(parent);
545 mem2: free(sp->fts_path);
546 mem1: free(sp);
547 return (NULL);
550 static void
551 internal_function
552 fts_load (FTS *sp, register FTSENT *p)
554 register size_t len;
555 register char *cp;
558 * Load the stream structure for the next traversal. Since we don't
559 * actually enter the directory until after the preorder visit, set
560 * the fts_accpath field specially so the chdir gets done to the right
561 * place and the user can access the first node. From fts_open it's
562 * known that the file name will fit.
564 len = p->fts_pathlen = p->fts_namelen;
565 memmove(sp->fts_path, p->fts_name, len + 1);
566 if ((cp = strrchr(p->fts_name, '/')) && (cp != p->fts_name || cp[1])) {
567 len = strlen(++cp);
568 memmove(p->fts_name, cp, len + 1);
569 p->fts_namelen = len;
571 p->fts_accpath = p->fts_path = sp->fts_path;
575 fts_close (FTS *sp)
577 register FTSENT *freep, *p;
578 int saved_errno = 0;
581 * This still works if we haven't read anything -- the dummy structure
582 * points to the root list, so we step through to the end of the root
583 * list which has a valid parent pointer.
585 if (sp->fts_cur) {
586 for (p = sp->fts_cur; p->fts_level >= FTS_ROOTLEVEL;) {
587 freep = p;
588 p = p->fts_link != NULL ? p->fts_link : p->fts_parent;
589 free(freep);
591 free(p);
594 /* Free up child linked list, sort array, file name buffer. */
595 if (sp->fts_child)
596 fts_lfree(sp->fts_child);
597 free(sp->fts_array);
598 free(sp->fts_path);
600 if (ISSET(FTS_CWDFD))
602 if (0 <= sp->fts_cwd_fd)
603 if (close (sp->fts_cwd_fd))
604 saved_errno = errno;
606 else if (!ISSET(FTS_NOCHDIR))
608 /* Return to original directory, save errno if necessary. */
609 if (fchdir(sp->fts_rfd))
610 saved_errno = errno;
612 /* If close fails, record errno only if saved_errno is zero,
613 so that we report the probably-more-meaningful fchdir errno. */
614 if (close (sp->fts_rfd))
615 if (saved_errno == 0)
616 saved_errno = errno;
619 fd_ring_clear (&sp->fts_fd_ring);
621 if (sp->fts_leaf_optimization_works_ht)
622 hash_free (sp->fts_leaf_optimization_works_ht);
624 free_dir (sp);
626 /* Free up the stream pointer. */
627 free(sp);
629 /* Set errno and return. */
630 if (saved_errno) {
631 __set_errno (saved_errno);
632 return (-1);
635 return (0);
638 /* Minimum link count of a traditional Unix directory. When leaf
639 optimization is OK and a directory's st_nlink == MIN_DIR_NLINK,
640 then the directory has no subdirectories. */
641 enum { MIN_DIR_NLINK = 2 };
643 /* Whether leaf optimization is OK for a directory. */
644 enum leaf_optimization
646 /* st_nlink is not reliable for this directory's subdirectories. */
647 NO_LEAF_OPTIMIZATION,
649 /* st_nlink == 2 means the directory lacks subdirectories. */
650 OK_LEAF_OPTIMIZATION
653 #if (defined __linux__ || defined __ANDROID__) \
654 && HAVE_SYS_VFS_H && HAVE_FSTATFS && HAVE_STRUCT_STATFS_F_TYPE
656 # include <sys/vfs.h>
658 /* Linux-specific constants from coreutils' src/fs.h */
659 # define S_MAGIC_AFS 0x5346414F
660 # define S_MAGIC_CIFS 0xFF534D42
661 # define S_MAGIC_NFS 0x6969
662 # define S_MAGIC_PROC 0x9FA0
663 # define S_MAGIC_TMPFS 0x1021994
665 # ifdef HAVE___FSWORD_T
666 typedef __fsword_t fsword;
667 # else
668 typedef long int fsword;
669 # endif
671 /* Map a stat.st_dev number to a file system type number f_ftype. */
672 struct dev_type
674 dev_t st_dev;
675 fsword f_type;
678 /* Use a tiny initial size. If a traversal encounters more than
679 a few devices, the cost of growing/rehashing this table will be
680 rendered negligible by the number of inodes processed. */
681 enum { DEV_TYPE_HT_INITIAL_SIZE = 13 };
683 static size_t
684 dev_type_hash (void const *x, size_t table_size)
686 struct dev_type const *ax = x;
687 uintmax_t dev = ax->st_dev;
688 return dev % table_size;
691 static bool
692 dev_type_compare (void const *x, void const *y)
694 struct dev_type const *ax = x;
695 struct dev_type const *ay = y;
696 return ax->st_dev == ay->st_dev;
699 /* Return the file system type of P with file descriptor FD, or 0 if not known.
700 If FD is negative, P's file descriptor is unavailable.
701 Try to cache known values. */
703 static fsword
704 filesystem_type (FTSENT const *p, int fd)
706 FTS *sp = p->fts_fts;
707 Hash_table *h = sp->fts_leaf_optimization_works_ht;
708 struct dev_type *ent;
709 struct statfs fs_buf;
711 /* If we're not in CWDFD mode, don't bother with this optimization,
712 since the caller is not serious about performance. */
713 if (!ISSET (FTS_CWDFD))
714 return 0;
716 if (! h)
717 h = sp->fts_leaf_optimization_works_ht
718 = hash_initialize (DEV_TYPE_HT_INITIAL_SIZE, NULL, dev_type_hash,
719 dev_type_compare, free);
720 if (h)
722 struct dev_type tmp;
723 tmp.st_dev = p->fts_statp->st_dev;
724 ent = hash_lookup (h, &tmp);
725 if (ent)
726 return ent->f_type;
729 /* Look-up failed. Query directly and cache the result. */
730 if (fd < 0 || fstatfs (fd, &fs_buf) != 0)
731 return 0;
733 if (h)
735 struct dev_type *t2 = malloc (sizeof *t2);
736 if (t2)
738 t2->st_dev = p->fts_statp->st_dev;
739 t2->f_type = fs_buf.f_type;
741 ent = hash_insert (h, t2);
742 if (ent)
743 fts_assert (ent == t2);
744 else
745 free (t2);
749 return fs_buf.f_type;
752 /* Return true if sorting dirents on inode numbers is known to improve
753 traversal performance for the directory P with descriptor DIR_FD.
754 Return false otherwise. When in doubt, return true.
755 DIR_FD is negative if unavailable. */
756 static bool
757 dirent_inode_sort_may_be_useful (FTSENT const *p, int dir_fd)
759 /* Skip the sort only if we can determine efficiently
760 that skipping it is the right thing to do.
761 The cost of performing an unnecessary sort is negligible,
762 while the cost of *not* performing it can be O(N^2) with
763 a very large constant. */
765 switch (filesystem_type (p, dir_fd))
767 case S_MAGIC_CIFS:
768 case S_MAGIC_NFS:
769 case S_MAGIC_TMPFS:
770 /* On a file system of any of these types, sorting
771 is unnecessary, and hence wasteful. */
772 return false;
774 default:
775 return true;
779 /* Given an FTS entry P for a directory with descriptor DIR_FD,
780 return whether it is valid to apply leaf optimization.
781 The optimization is valid if a directory's st_nlink value equal
782 to MIN_DIR_NLINK means the directory has no subdirectories.
783 DIR_FD is negative if unavailable. */
784 static enum leaf_optimization
785 leaf_optimization (FTSENT const *p, int dir_fd)
787 switch (filesystem_type (p, dir_fd))
789 case 0:
790 /* Leaf optimization is unsafe if the file system type is unknown. */
791 FALLTHROUGH;
792 case S_MAGIC_AFS:
793 /* Although AFS mount points are not counted in st_nlink, they
794 act like directories. See <https://bugs.debian.org/143111>. */
795 FALLTHROUGH;
796 case S_MAGIC_CIFS:
797 /* Leaf optimization causes 'find' to abort. See
798 <https://lists.gnu.org/r/bug-gnulib/2018-04/msg00015.html>. */
799 FALLTHROUGH;
800 case S_MAGIC_NFS:
801 /* NFS provides usable dirent.d_type but not necessarily for all entries
802 of large directories, so as per <https://bugzilla.redhat.com/1252549>
803 NFS should return true. However st_nlink values are not accurate on
804 all implementations as per <https://bugzilla.redhat.com/1299169>. */
805 FALLTHROUGH;
806 case S_MAGIC_PROC:
807 /* Per <https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=143111> /proc
808 may have bogus stat.st_nlink values. */
809 return NO_LEAF_OPTIMIZATION;
811 default:
812 return OK_LEAF_OPTIMIZATION;
816 #else
817 static bool
818 dirent_inode_sort_may_be_useful (_GL_UNUSED FTSENT const *p,
819 _GL_UNUSED int dir_fd)
821 return true;
823 static enum leaf_optimization
824 leaf_optimization (_GL_UNUSED FTSENT const *p, _GL_UNUSED int dir_fd)
826 return NO_LEAF_OPTIMIZATION;
828 #endif
831 * Special case of "/" at the end of the file name so that slashes aren't
832 * appended which would cause file names to be written as "....//foo".
834 #define NAPPEND(p) \
835 (p->fts_path[p->fts_pathlen - 1] == '/' \
836 ? p->fts_pathlen - 1 : p->fts_pathlen)
838 FTSENT *
839 fts_read (register FTS *sp)
841 register FTSENT *p, *tmp;
842 register unsigned short int instr;
843 register char *t;
845 /* If finished or unrecoverable error, return NULL. */
846 if (sp->fts_cur == NULL || ISSET(FTS_STOP))
847 return (NULL);
849 /* Set current node pointer. */
850 p = sp->fts_cur;
852 /* Save and zero out user instructions. */
853 instr = p->fts_instr;
854 p->fts_instr = FTS_NOINSTR;
856 /* Any type of file may be re-visited; re-stat and re-turn. */
857 if (instr == FTS_AGAIN) {
858 p->fts_info = fts_stat(sp, p, false);
859 return (p);
861 Dprintf (("fts_read: p=%s\n",
862 p->fts_info == FTS_INIT ? "" : p->fts_path));
865 * Following a symlink -- SLNONE test allows application to see
866 * SLNONE and recover. If indirecting through a symlink, have
867 * keep a pointer to current location. If unable to get that
868 * pointer, follow fails.
870 if (instr == FTS_FOLLOW &&
871 (p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) {
872 p->fts_info = fts_stat(sp, p, true);
873 if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
874 if ((p->fts_symfd = diropen (sp, ".")) < 0) {
875 p->fts_errno = errno;
876 p->fts_info = FTS_ERR;
877 } else
878 p->fts_flags |= FTS_SYMFOLLOW;
880 goto check_for_dir;
883 /* Directory in pre-order. */
884 if (p->fts_info == FTS_D) {
885 /* If skipped or crossed mount point, do post-order visit. */
886 if (instr == FTS_SKIP ||
887 (ISSET(FTS_XDEV) && p->fts_statp->st_dev != sp->fts_dev)) {
888 if (p->fts_flags & FTS_SYMFOLLOW)
889 (void)close(p->fts_symfd);
890 if (sp->fts_child) {
891 fts_lfree(sp->fts_child);
892 sp->fts_child = NULL;
894 p->fts_info = FTS_DP;
895 LEAVE_DIR (sp, p, "1");
896 return (p);
899 /* Rebuild if only read the names and now traversing. */
900 if (sp->fts_child != NULL && ISSET(FTS_NAMEONLY)) {
901 CLR(FTS_NAMEONLY);
902 fts_lfree(sp->fts_child);
903 sp->fts_child = NULL;
907 * Cd to the subdirectory.
909 * If have already read and now fail to chdir, whack the list
910 * to make the names come out right, and set the parent errno
911 * so the application will eventually get an error condition.
912 * Set the FTS_DONTCHDIR flag so that when we logically change
913 * directories back to the parent we don't do a chdir.
915 * If haven't read do so. If the read fails, fts_build sets
916 * FTS_STOP or the fts_info field of the node.
918 if (sp->fts_child != NULL) {
919 if (fts_safe_changedir(sp, p, -1, p->fts_accpath)) {
920 p->fts_errno = errno;
921 p->fts_flags |= FTS_DONTCHDIR;
922 for (p = sp->fts_child; p != NULL;
923 p = p->fts_link)
924 p->fts_accpath =
925 p->fts_parent->fts_accpath;
927 } else if ((sp->fts_child = fts_build(sp, BREAD)) == NULL) {
928 if (ISSET(FTS_STOP))
929 return (NULL);
930 /* If fts_build's call to fts_safe_changedir failed
931 because it was not able to fchdir into a
932 subdirectory, tell the caller. */
933 if (p->fts_errno && p->fts_info != FTS_DNR)
934 p->fts_info = FTS_ERR;
935 LEAVE_DIR (sp, p, "2");
936 return (p);
938 p = sp->fts_child;
939 sp->fts_child = NULL;
940 goto name;
943 /* Move to the next node on this level. */
944 next: tmp = p;
946 /* If we have so many directory entries that we're reading them
947 in batches, and we've reached the end of the current batch,
948 read in a new batch. */
949 if (p->fts_link == NULL && p->fts_parent->fts_dirp)
951 p = tmp->fts_parent;
952 sp->fts_cur = p;
953 sp->fts_path[p->fts_pathlen] = '\0';
955 if ((p = fts_build (sp, BREAD)) == NULL)
957 if (ISSET(FTS_STOP))
958 return NULL;
959 goto cd_dot_dot;
962 free(tmp);
963 goto name;
966 if ((p = p->fts_link) != NULL) {
967 sp->fts_cur = p;
968 free(tmp);
971 * If reached the top, return to the original directory (or
972 * the root of the tree), and load the file names for the next
973 * root.
975 if (p->fts_level == FTS_ROOTLEVEL) {
976 if (restore_initial_cwd(sp)) {
977 SET(FTS_STOP);
978 return (NULL);
980 free_dir(sp);
981 fts_load(sp, p);
982 if (! setup_dir(sp)) {
983 free_dir(sp);
984 return (NULL);
986 goto check_for_dir;
990 * User may have called fts_set on the node. If skipped,
991 * ignore. If followed, get a file descriptor so we can
992 * get back if necessary.
994 if (p->fts_instr == FTS_SKIP)
995 goto next;
996 if (p->fts_instr == FTS_FOLLOW) {
997 p->fts_info = fts_stat(sp, p, true);
998 if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
999 if ((p->fts_symfd = diropen (sp, ".")) < 0) {
1000 p->fts_errno = errno;
1001 p->fts_info = FTS_ERR;
1002 } else
1003 p->fts_flags |= FTS_SYMFOLLOW;
1005 p->fts_instr = FTS_NOINSTR;
1008 name: t = sp->fts_path + NAPPEND(p->fts_parent);
1009 *t++ = '/';
1010 memmove(t, p->fts_name, p->fts_namelen + 1);
1011 check_for_dir:
1012 sp->fts_cur = p;
1013 if (p->fts_info == FTS_NSOK)
1015 if (p->fts_statp->st_size == FTS_STAT_REQUIRED)
1016 p->fts_info = fts_stat(sp, p, false);
1017 else
1018 fts_assert (p->fts_statp->st_size == FTS_NO_STAT_REQUIRED);
1021 if (p->fts_info == FTS_D)
1023 /* Now that P->fts_statp is guaranteed to be valid,
1024 if this is a command-line directory, record its
1025 device number, to be used for FTS_XDEV. */
1026 if (p->fts_level == FTS_ROOTLEVEL)
1027 sp->fts_dev = p->fts_statp->st_dev;
1028 Dprintf ((" entering: %s\n", p->fts_path));
1029 if (! enter_dir (sp, p))
1030 return NULL;
1032 return p;
1034 cd_dot_dot:
1036 /* Move up to the parent node. */
1037 p = tmp->fts_parent;
1038 sp->fts_cur = p;
1039 free(tmp);
1041 if (p->fts_level == FTS_ROOTPARENTLEVEL) {
1043 * Done; free everything up and set errno to 0 so the user
1044 * can distinguish between error and EOF.
1046 free(p);
1047 __set_errno (0);
1048 return (sp->fts_cur = NULL);
1051 fts_assert (p->fts_info != FTS_NSOK);
1053 /* NUL terminate the file name. */
1054 sp->fts_path[p->fts_pathlen] = '\0';
1057 * Return to the parent directory. If at a root node, restore
1058 * the initial working directory. If we came through a symlink,
1059 * go back through the file descriptor. Otherwise, move up
1060 * one level, via "..".
1062 if (p->fts_level == FTS_ROOTLEVEL) {
1063 if (restore_initial_cwd(sp)) {
1064 p->fts_errno = errno;
1065 SET(FTS_STOP);
1067 } else if (p->fts_flags & FTS_SYMFOLLOW) {
1068 if (FCHDIR(sp, p->fts_symfd)) {
1069 p->fts_errno = errno;
1070 SET(FTS_STOP);
1072 (void)close(p->fts_symfd);
1073 } else if (!(p->fts_flags & FTS_DONTCHDIR) &&
1074 fts_safe_changedir(sp, p->fts_parent, -1, "..")) {
1075 p->fts_errno = errno;
1076 SET(FTS_STOP);
1079 /* If the directory causes a cycle, preserve the FTS_DC flag and keep
1080 the corresponding dev/ino pair in the hash table. It is going to be
1081 removed when leaving the original directory. */
1082 if (p->fts_info != FTS_DC) {
1083 p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP;
1084 if (p->fts_errno == 0)
1085 LEAVE_DIR (sp, p, "3");
1087 return ISSET(FTS_STOP) ? NULL : p;
1091 * Fts_set takes the stream as an argument although it's not used in this
1092 * implementation; it would be necessary if anyone wanted to add global
1093 * semantics to fts using fts_set. An error return is allowed for similar
1094 * reasons.
1096 /* ARGSUSED */
1098 fts_set(_GL_UNUSED FTS *sp, FTSENT *p, int instr)
1100 if (instr != 0 && instr != FTS_AGAIN && instr != FTS_FOLLOW &&
1101 instr != FTS_NOINSTR && instr != FTS_SKIP) {
1102 __set_errno (EINVAL);
1103 return (1);
1105 p->fts_instr = instr;
1106 return (0);
1109 FTSENT *
1110 fts_children (register FTS *sp, int instr)
1112 register FTSENT *p;
1113 int fd;
1115 if (instr != 0 && instr != FTS_NAMEONLY) {
1116 __set_errno (EINVAL);
1117 return (NULL);
1120 /* Set current node pointer. */
1121 p = sp->fts_cur;
1124 * Errno set to 0 so user can distinguish empty directory from
1125 * an error.
1127 __set_errno (0);
1129 /* Fatal errors stop here. */
1130 if (ISSET(FTS_STOP))
1131 return (NULL);
1133 /* Return logical hierarchy of user's arguments. */
1134 if (p->fts_info == FTS_INIT)
1135 return (p->fts_link);
1138 * If not a directory being visited in pre-order, stop here. Could
1139 * allow FTS_DNR, assuming the user has fixed the problem, but the
1140 * same effect is available with FTS_AGAIN.
1142 if (p->fts_info != FTS_D /* && p->fts_info != FTS_DNR */)
1143 return (NULL);
1145 /* Free up any previous child list. */
1146 if (sp->fts_child != NULL)
1147 fts_lfree(sp->fts_child);
1149 if (instr == FTS_NAMEONLY) {
1150 SET(FTS_NAMEONLY);
1151 instr = BNAMES;
1152 } else
1153 instr = BCHILD;
1156 * If using chdir on a relative file name and called BEFORE fts_read
1157 * does its chdir to the root of a traversal, we can lose -- we need to
1158 * chdir into the subdirectory, and we don't know where the current
1159 * directory is, so we can't get back so that the upcoming chdir by
1160 * fts_read will work.
1162 if (p->fts_level != FTS_ROOTLEVEL || p->fts_accpath[0] == '/' ||
1163 ISSET(FTS_NOCHDIR))
1164 return (sp->fts_child = fts_build(sp, instr));
1166 if ((fd = diropen (sp, ".")) < 0)
1167 return (sp->fts_child = NULL);
1168 sp->fts_child = fts_build(sp, instr);
1169 if (ISSET(FTS_CWDFD))
1171 cwd_advance_fd (sp, fd, true);
1173 else
1175 if (fchdir(fd))
1177 int saved_errno = errno;
1178 close (fd);
1179 __set_errno (saved_errno);
1180 return NULL;
1182 close (fd);
1184 return (sp->fts_child);
1187 /* A comparison function to sort on increasing inode number.
1188 For some file system types, sorting either way makes a huge
1189 performance difference for a directory with very many entries,
1190 but sorting on increasing values is slightly better than sorting
1191 on decreasing values. The difference is in the 5% range. */
1192 static int
1193 fts_compare_ino (struct _ftsent const **a, struct _ftsent const **b)
1195 return _GL_CMP (a[0]->fts_statp->st_ino, b[0]->fts_statp->st_ino);
1198 /* Map the dirent.d_type value, DTYPE, to the corresponding stat.st_mode
1199 S_IF* bit and set ST.st_mode, thus clearing all other bits in that field. */
1200 static void
1201 set_stat_type (struct stat *st, unsigned int dtype)
1203 mode_t type;
1204 switch (dtype)
1206 case DT_BLK:
1207 type = S_IFBLK;
1208 break;
1209 case DT_CHR:
1210 type = S_IFCHR;
1211 break;
1212 case DT_DIR:
1213 type = S_IFDIR;
1214 break;
1215 case DT_FIFO:
1216 type = S_IFIFO;
1217 break;
1218 case DT_LNK:
1219 type = S_IFLNK;
1220 break;
1221 case DT_REG:
1222 type = S_IFREG;
1223 break;
1224 case DT_SOCK:
1225 type = S_IFSOCK;
1226 break;
1227 default:
1228 type = 0;
1230 st->st_mode = type;
1233 #define closedir_and_clear(dirp) \
1234 do \
1236 closedir (dirp); \
1237 dirp = NULL; \
1239 while (0)
1241 #define fts_opendir(file, Pdir_fd) \
1242 opendirat((! ISSET(FTS_NOCHDIR) && ISSET(FTS_CWDFD) \
1243 ? sp->fts_cwd_fd : AT_FDCWD), \
1244 file, \
1245 (((ISSET(FTS_PHYSICAL) \
1246 && ! (ISSET(FTS_COMFOLLOW) \
1247 && cur->fts_level == FTS_ROOTLEVEL)) \
1248 ? O_NOFOLLOW : 0)), \
1249 Pdir_fd)
1252 * This is the tricky part -- do not casually change *anything* in here. The
1253 * idea is to build the linked list of entries that are used by fts_children
1254 * and fts_read. There are lots of special cases.
1256 * The real slowdown in walking the tree is the stat calls. If FTS_NOSTAT is
1257 * set and it's a physical walk (so that symbolic links can't be directories),
1258 * we can do things quickly. First, if it's a 4.4BSD file system, the type
1259 * of the file is in the directory entry. Otherwise, we assume that the number
1260 * of subdirectories in a node is equal to the number of links to the parent.
1261 * The former skips all stat calls. The latter skips stat calls in any leaf
1262 * directories and for any files after the subdirectories in the directory have
1263 * been found, cutting the stat calls by about 2/3.
1265 static FTSENT *
1266 internal_function
1267 fts_build (register FTS *sp, int type)
1269 register FTSENT *p, *head;
1270 register size_t nitems;
1271 FTSENT *tail;
1272 int saved_errno;
1273 bool descend;
1274 bool doadjust;
1275 ptrdiff_t level;
1276 size_t len, maxlen, new_len;
1277 char *cp;
1278 int dir_fd;
1279 FTSENT *cur = sp->fts_cur;
1280 bool continue_readdir = !!cur->fts_dirp;
1281 bool sort_by_inode = false;
1282 size_t max_entries;
1284 /* When cur->fts_dirp is non-NULL, that means we should
1285 continue calling readdir on that existing DIR* pointer
1286 rather than opening a new one. */
1287 if (continue_readdir)
1289 DIR *dp = cur->fts_dirp;
1290 dir_fd = dirfd (dp);
1291 if (dir_fd < 0)
1293 int dirfd_errno = errno;
1294 closedir_and_clear (cur->fts_dirp);
1295 if (type == BREAD)
1297 cur->fts_info = FTS_DNR;
1298 cur->fts_errno = dirfd_errno;
1300 return NULL;
1303 else
1305 /* Open the directory for reading. If this fails, we're done.
1306 If being called from fts_read, set the fts_info field. */
1307 if ((cur->fts_dirp = fts_opendir(cur->fts_accpath, &dir_fd)) == NULL)
1309 if (type == BREAD)
1311 cur->fts_info = FTS_DNR;
1312 cur->fts_errno = errno;
1314 return NULL;
1316 /* Rather than calling fts_stat for each and every entry encountered
1317 in the readdir loop (below), stat each directory only right after
1318 opening it. */
1319 bool stat_optimization = cur->fts_info == FTS_NSOK;
1321 if (stat_optimization
1322 /* Also read the stat info again after opening a directory to
1323 reveal eventual changes caused by a submount triggered by
1324 the traversal. But do it only for utilities which use
1325 FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du
1326 benefit/suffer from this feature for now. */
1327 || ISSET (FTS_TIGHT_CYCLE_CHECK))
1329 if (!stat_optimization)
1330 LEAVE_DIR (sp, cur, "4");
1331 if (fstat (dir_fd, cur->fts_statp) != 0)
1333 int fstat_errno = errno;
1334 closedir_and_clear (cur->fts_dirp);
1335 if (type == BREAD)
1337 cur->fts_errno = fstat_errno;
1338 cur->fts_info = FTS_NS;
1340 __set_errno (fstat_errno);
1341 return NULL;
1343 if (stat_optimization)
1344 cur->fts_info = FTS_D;
1345 else if (! enter_dir (sp, cur))
1347 int err = errno;
1348 closedir_and_clear (cur->fts_dirp);
1349 __set_errno (err);
1350 return NULL;
1355 /* Maximum number of readdir entries to read at one time. This
1356 limitation is to avoid reading millions of entries into memory
1357 at once. When an fts_compar function is specified, we have no
1358 choice: we must read all entries into memory before calling that
1359 function. But when no such function is specified, we can read
1360 entries in batches that are large enough to help us with inode-
1361 sorting, yet not so large that we risk exhausting memory. */
1362 max_entries = sp->fts_compar ? SIZE_MAX : FTS_MAX_READDIR_ENTRIES;
1365 * If we're going to need to stat anything or we want to descend
1366 * and stay in the directory, chdir. If this fails we keep going,
1367 * but set a flag so we don't chdir after the post-order visit.
1368 * We won't be able to stat anything, but we can still return the
1369 * names themselves. Note, that since fts_read won't be able to
1370 * chdir into the directory, it will have to return different file
1371 * names than before, i.e. "a/b" instead of "b". Since the node
1372 * has already been visited in pre-order, have to wait until the
1373 * post-order visit to return the error. There is a special case
1374 * here, if there was nothing to stat then it's not an error to
1375 * not be able to stat. This is all fairly nasty. If a program
1376 * needed sorted entries or stat information, they had better be
1377 * checking FTS_NS on the returned nodes.
1379 if (continue_readdir)
1381 /* When resuming a short readdir run, we already have
1382 the required dirp and dir_fd. */
1383 descend = true;
1385 else
1387 /* Try to descend unless it is a names-only fts_children,
1388 or the directory is known to lack subdirectories. */
1389 descend = (type != BNAMES
1390 && ! (ISSET (FTS_NOSTAT) && ISSET (FTS_PHYSICAL)
1391 && ! ISSET (FTS_SEEDOT)
1392 && cur->fts_statp->st_nlink == MIN_DIR_NLINK
1393 && (leaf_optimization (cur, dir_fd)
1394 != NO_LEAF_OPTIMIZATION)));
1395 if (descend || type == BREAD)
1397 if (ISSET(FTS_CWDFD))
1398 dir_fd = fcntl (dir_fd, F_DUPFD_CLOEXEC, STDERR_FILENO + 1);
1399 if (dir_fd < 0 || fts_safe_changedir(sp, cur, dir_fd, NULL)) {
1400 if (descend && type == BREAD)
1401 cur->fts_errno = errno;
1402 cur->fts_flags |= FTS_DONTCHDIR;
1403 descend = false;
1404 closedir_and_clear(cur->fts_dirp);
1405 if (ISSET(FTS_CWDFD) && 0 <= dir_fd)
1406 close (dir_fd);
1407 cur->fts_dirp = NULL;
1408 } else
1409 descend = true;
1414 * Figure out the max file name length that can be stored in the
1415 * current buffer -- the inner loop allocates more space as necessary.
1416 * We really wouldn't have to do the maxlen calculations here, we
1417 * could do them in fts_read before returning the name, but it's a
1418 * lot easier here since the length is part of the dirent structure.
1420 * If not changing directories set a pointer so that can just append
1421 * each new component into the file name.
1423 len = NAPPEND(cur);
1424 if (ISSET(FTS_NOCHDIR)) {
1425 cp = sp->fts_path + len;
1426 *cp++ = '/';
1427 } else {
1428 /* GCC, you're too verbose. */
1429 cp = NULL;
1431 len++;
1432 maxlen = sp->fts_pathlen - len;
1434 level = cur->fts_level + 1;
1436 /* Read the directory, attaching each entry to the "link" pointer. */
1437 doadjust = false;
1438 head = NULL;
1439 tail = NULL;
1440 nitems = 0;
1441 while (cur->fts_dirp) {
1442 size_t d_namelen;
1443 __set_errno (0);
1444 struct dirent *dp = readdir(cur->fts_dirp);
1445 if (dp == NULL) {
1446 if (errno) {
1447 cur->fts_errno = errno;
1448 /* If we've not read any items yet, treat
1449 the error as if we can't access the dir. */
1450 cur->fts_info = (continue_readdir || nitems)
1451 ? FTS_ERR : FTS_DNR;
1453 closedir_and_clear(cur->fts_dirp);
1454 break;
1456 if (!ISSET(FTS_SEEDOT) && ISDOT(dp->d_name))
1457 continue;
1459 d_namelen = _D_EXACT_NAMLEN (dp);
1460 p = fts_alloc (sp, dp->d_name, d_namelen);
1461 if (!p)
1462 goto mem1;
1463 if (d_namelen >= maxlen) {
1464 /* include space for NUL */
1465 uintptr_t oldaddr = (uintptr_t) sp->fts_path;
1466 if (! fts_palloc(sp, d_namelen + len + 1)) {
1468 * No more memory. Save
1469 * errno, free up the current structure and the
1470 * structures already allocated.
1472 mem1: saved_errno = errno;
1473 free(p);
1474 fts_lfree(head);
1475 closedir_and_clear(cur->fts_dirp);
1476 cur->fts_info = FTS_ERR;
1477 SET(FTS_STOP);
1478 __set_errno (saved_errno);
1479 return (NULL);
1481 /* Did realloc() change the pointer? */
1482 if (oldaddr != (uintptr_t) sp->fts_path) {
1483 doadjust = true;
1484 if (ISSET(FTS_NOCHDIR))
1485 cp = sp->fts_path + len;
1487 maxlen = sp->fts_pathlen - len;
1490 new_len = len + d_namelen;
1491 if (new_len < len) {
1493 * In the unlikely event that we would end up
1494 * with a file name longer than SIZE_MAX, free up
1495 * the current structure and the structures already
1496 * allocated, then error out with ENAMETOOLONG.
1498 free(p);
1499 fts_lfree(head);
1500 closedir_and_clear(cur->fts_dirp);
1501 cur->fts_info = FTS_ERR;
1502 SET(FTS_STOP);
1503 __set_errno (ENAMETOOLONG);
1504 return (NULL);
1506 p->fts_level = level;
1507 p->fts_parent = sp->fts_cur;
1508 p->fts_pathlen = new_len;
1510 /* Store dirent.d_ino, in case we need to sort
1511 entries before processing them. */
1512 p->fts_statp->st_ino = D_INO (dp);
1514 /* Build a file name for fts_stat to stat. */
1515 if (ISSET(FTS_NOCHDIR)) {
1516 p->fts_accpath = p->fts_path;
1517 memmove(cp, p->fts_name, p->fts_namelen + 1);
1518 } else
1519 p->fts_accpath = p->fts_name;
1521 if (sp->fts_compar == NULL || ISSET(FTS_DEFER_STAT)) {
1522 /* Record what fts_read will have to do with this
1523 entry. In many cases, it will simply fts_stat it,
1524 but we can take advantage of any d_type information
1525 to optimize away the unnecessary stat calls. I.e.,
1526 if FTS_NOSTAT is in effect and we're not following
1527 symlinks (FTS_PHYSICAL) and d_type indicates this
1528 is *not* a directory, then we won't have to stat it
1529 at all. If it *is* a directory, then (currently)
1530 we stat it regardless, in order to get device and
1531 inode numbers. Some day we might optimize that
1532 away, too, for directories where d_ino is known to
1533 be valid. */
1534 bool skip_stat = (ISSET(FTS_NOSTAT)
1535 && DT_IS_KNOWN(dp)
1536 && ! DT_MUST_BE(dp, DT_DIR)
1537 && (ISSET(FTS_PHYSICAL)
1538 || ! DT_MUST_BE(dp, DT_LNK)));
1539 p->fts_info = FTS_NSOK;
1540 /* Propagate dirent.d_type information back
1541 to caller, when possible. */
1542 set_stat_type (p->fts_statp, D_TYPE (dp));
1543 fts_set_stat_required(p, !skip_stat);
1544 } else {
1545 p->fts_info = fts_stat(sp, p, false);
1548 /* We walk in directory order so "ls -f" doesn't get upset. */
1549 p->fts_link = NULL;
1550 if (head == NULL)
1551 head = tail = p;
1552 else {
1553 tail->fts_link = p;
1554 tail = p;
1557 /* If there are many entries, no sorting function has been
1558 specified, and this file system is of a type that may be
1559 slow with a large number of entries, arrange to sort the
1560 directory entries on increasing inode numbers.
1562 The NITEMS comparison uses ==, not >, because the test
1563 needs to be tried at most once once, and NITEMS will exceed
1564 the threshold after it is incremented below. */
1565 if (nitems == _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD
1566 && !sp->fts_compar)
1567 sort_by_inode = dirent_inode_sort_may_be_useful (cur, dir_fd);
1569 ++nitems;
1570 if (max_entries <= nitems) {
1571 /* When there are too many dir entries, leave
1572 fts_dirp open, so that a subsequent fts_read
1573 can take up where we leave off. */
1574 break;
1579 * If realloc() changed the address of the file name, adjust the
1580 * addresses for the rest of the tree and the dir list.
1582 if (doadjust)
1583 fts_padjust(sp, head);
1586 * If not changing directories, reset the file name back to original
1587 * state.
1589 if (ISSET(FTS_NOCHDIR)) {
1590 if (len == sp->fts_pathlen || nitems == 0)
1591 --cp;
1592 *cp = '\0';
1596 * If descended after called from fts_children or after called from
1597 * fts_read and nothing found, get back. At the root level we use
1598 * the saved fd; if one of fts_open()'s arguments is a relative name
1599 * to an empty directory, we wind up here with no other way back. If
1600 * can't get back, we're done.
1602 if (!continue_readdir && descend && (type == BCHILD || !nitems) &&
1603 (cur->fts_level == FTS_ROOTLEVEL
1604 ? restore_initial_cwd(sp)
1605 : fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) {
1606 cur->fts_info = FTS_ERR;
1607 SET(FTS_STOP);
1608 fts_lfree(head);
1609 return (NULL);
1612 /* If didn't find anything, return NULL. */
1613 if (!nitems) {
1614 if (type == BREAD
1615 && cur->fts_info != FTS_DNR && cur->fts_info != FTS_ERR)
1616 cur->fts_info = FTS_DP;
1617 fts_lfree(head);
1618 return (NULL);
1621 if (sort_by_inode) {
1622 sp->fts_compar = fts_compare_ino;
1623 head = fts_sort (sp, head, nitems);
1624 sp->fts_compar = NULL;
1627 /* Sort the entries. */
1628 if (sp->fts_compar && nitems > 1)
1629 head = fts_sort(sp, head, nitems);
1630 return (head);
1633 #if GNULIB_FTS_DEBUG
1635 struct devino {
1636 intmax_t dev, ino;
1638 #define PRINT_DEVINO "(%jd,%jd)"
1640 static struct devino
1641 getdevino (int fd)
1643 struct stat st;
1644 return (fd == AT_FDCWD
1645 ? (struct devino) { -1, 0 }
1646 : fstat (fd, &st) == 0
1647 ? (struct devino) { st.st_dev, st.st_ino }
1648 : (struct devino) { -1, errno });
1651 /* Walk ->fts_parent links starting at E_CURR, until the root of the
1652 current hierarchy. There should be a directory with dev/inode
1653 matching those of AD. If not, print a lot of diagnostics. */
1654 static void
1655 find_matching_ancestor (FTSENT const *e_curr, struct Active_dir const *ad)
1657 FTSENT const *ent;
1658 for (ent = e_curr; ent->fts_level >= FTS_ROOTLEVEL; ent = ent->fts_parent)
1660 if (ad->ino == ent->fts_statp->st_ino
1661 && ad->dev == ent->fts_statp->st_dev)
1662 return;
1664 printf ("ERROR: tree dir, %s, not active\n", ad->fts_ent->fts_accpath);
1665 printf ("active dirs:\n");
1666 for (ent = e_curr;
1667 ent->fts_level >= FTS_ROOTLEVEL; ent = ent->fts_parent)
1668 printf (" %s(%"PRIuMAX"/%"PRIuMAX") to %s(%"PRIuMAX"/%"PRIuMAX")...\n",
1669 ad->fts_ent->fts_accpath,
1670 (uintmax_t) ad->dev,
1671 (uintmax_t) ad->ino,
1672 ent->fts_accpath,
1673 (uintmax_t) ent->fts_statp->st_dev,
1674 (uintmax_t) ent->fts_statp->st_ino);
1677 void
1678 fts_cross_check (FTS const *sp)
1680 FTSENT const *ent = sp->fts_cur;
1681 FTSENT const *t;
1682 if ( ! ISSET (FTS_TIGHT_CYCLE_CHECK))
1683 return;
1685 Dprintf (("fts-cross-check cur=%s\n", ent->fts_path));
1686 /* Make sure every parent dir is in the tree. */
1687 for (t = ent->fts_parent; t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent)
1689 struct Active_dir ad;
1690 ad.ino = t->fts_statp->st_ino;
1691 ad.dev = t->fts_statp->st_dev;
1692 if ( ! hash_lookup (sp->fts_cycle.ht, &ad))
1693 printf ("ERROR: active dir, %s, not in tree\n", t->fts_path);
1696 /* Make sure every dir in the tree is an active dir.
1697 But ENT is not necessarily a directory. If so, just skip this part. */
1698 if (ent->fts_parent->fts_level >= FTS_ROOTLEVEL
1699 && (ent->fts_info == FTS_DP
1700 || ent->fts_info == FTS_D))
1702 struct Active_dir *ad;
1703 for (ad = hash_get_first (sp->fts_cycle.ht); ad != NULL;
1704 ad = hash_get_next (sp->fts_cycle.ht, ad))
1706 find_matching_ancestor (ent, ad);
1711 static bool
1712 same_fd (int fd1, int fd2)
1714 struct stat sb1, sb2;
1715 return (fstat (fd1, &sb1) == 0
1716 && fstat (fd2, &sb2) == 0
1717 && psame_inode (&sb1, &sb2));
1720 static void
1721 fd_ring_print (FTS const *sp, FILE *stream, char const *msg)
1723 if (!fts_debug)
1724 return;
1725 I_ring const *fd_ring = &sp->fts_fd_ring;
1726 unsigned int i = fd_ring->ir_front;
1727 struct devino cwd = getdevino (sp->fts_cwd_fd);
1728 fprintf (stream, "=== %s ========== "PRINT_DEVINO"\n", msg, cwd.dev, cwd.ino);
1729 if (i_ring_empty (fd_ring))
1730 return;
1732 while (true)
1734 int fd = fd_ring->ir_data[i];
1735 if (fd < 0)
1736 fprintf (stream, "%u: %d:\n", i, fd);
1737 else
1739 struct devino wd = getdevino (fd);
1740 fprintf (stream, "%u: %d: "PRINT_DEVINO"\n", i, fd, wd.dev, wd.ino);
1742 if (i == fd_ring->ir_back)
1743 break;
1744 i = (i + I_RING_SIZE - 1) % I_RING_SIZE;
1748 /* Ensure that each file descriptor on the fd_ring matches a
1749 parent, grandparent, etc. of the current working directory. */
1750 static void
1751 fd_ring_check (FTS const *sp)
1753 if (!fts_debug)
1754 return;
1756 /* Make a writable copy. */
1757 I_ring fd_w = sp->fts_fd_ring;
1759 int cwd_fd = sp->fts_cwd_fd;
1760 cwd_fd = fcntl (cwd_fd, F_DUPFD_CLOEXEC, STDERR_FILENO + 1);
1761 struct devino dot = getdevino (cwd_fd);
1762 fprintf (stderr, "===== check ===== cwd: "PRINT_DEVINO"\n",
1763 dot.dev, dot.ino);
1764 while ( ! i_ring_empty (&fd_w))
1766 int fd = i_ring_pop (&fd_w);
1767 if (0 <= fd)
1769 int open_flags = O_SEARCH | O_CLOEXEC;
1770 int parent_fd = openat (cwd_fd, "..", open_flags);
1771 if (parent_fd < 0)
1773 // Warn?
1774 break;
1776 if (!same_fd (fd, parent_fd))
1778 struct devino cwd = getdevino (fd);
1779 fprintf (stderr, "ring : "PRINT_DEVINO"\n", cwd.dev, cwd.ino);
1780 struct devino c2 = getdevino (parent_fd);
1781 fprintf (stderr, "parent: "PRINT_DEVINO"\n", c2.dev, c2.ino);
1782 fts_assert (0);
1784 close (cwd_fd);
1785 cwd_fd = parent_fd;
1788 close (cwd_fd);
1790 #endif
1792 static unsigned short int
1793 internal_function
1794 fts_stat(FTS *sp, register FTSENT *p, bool follow)
1796 struct stat *sbp = p->fts_statp;
1798 if (ISSET (FTS_LOGICAL)
1799 || (ISSET (FTS_COMFOLLOW) && p->fts_level == FTS_ROOTLEVEL))
1800 follow = true;
1803 * If doing a logical walk, or application requested FTS_FOLLOW, do
1804 * a stat(2). If that fails, check for a nonexistent symlink. If
1805 * fail, set the errno from the stat call.
1807 int flags = follow ? 0 : AT_SYMLINK_NOFOLLOW;
1808 if (fstatat (sp->fts_cwd_fd, p->fts_accpath, sbp, flags) < 0)
1810 if (follow && errno == ENOENT
1811 && 0 <= fstatat (sp->fts_cwd_fd, p->fts_accpath, sbp,
1812 AT_SYMLINK_NOFOLLOW))
1814 __set_errno (0);
1815 return FTS_SLNONE;
1818 p->fts_errno = errno;
1819 memset (sbp, 0, sizeof *sbp);
1820 return FTS_NS;
1823 if (S_ISDIR(sbp->st_mode)) {
1824 if (ISDOT(p->fts_name)) {
1825 /* Command-line "." and ".." are real directories. */
1826 return (p->fts_level == FTS_ROOTLEVEL ? FTS_D : FTS_DOT);
1829 return (FTS_D);
1831 if (S_ISLNK(sbp->st_mode))
1832 return (FTS_SL);
1833 if (S_ISREG(sbp->st_mode))
1834 return (FTS_F);
1835 return (FTS_DEFAULT);
1838 static int
1839 fts_compar (void const *a, void const *b)
1841 /* Convert A and B to the correct types, to pacify the compiler, and
1842 for portability to bizarre hosts where "void const *" and "FTSENT
1843 const **" differ in runtime representation. The comparison
1844 function cannot modify *a and *b, but there is no compile-time
1845 check for this. */
1846 FTSENT const **pa = (FTSENT const **) a;
1847 FTSENT const **pb = (FTSENT const **) b;
1848 return pa[0]->fts_fts->fts_compar (pa, pb);
1851 static FTSENT *
1852 internal_function
1853 fts_sort (FTS *sp, FTSENT *head, register size_t nitems)
1855 register FTSENT **ap, *p;
1857 /* On most modern hosts, void * and FTSENT ** have the same
1858 run-time representation, and one can convert sp->fts_compar to
1859 the type qsort expects without problem. Use the heuristic that
1860 this is OK if the two pointer types are the same size, and if
1861 converting FTSENT ** to uintptr_t is the same as converting
1862 FTSENT ** to void * and then to uintptr_t. This heuristic isn't
1863 valid in general but we don't know of any counterexamples. */
1864 FTSENT *dummy;
1865 int (*compare) (void const *, void const *) =
1866 ((sizeof &dummy == sizeof (void *)
1867 && (uintptr_t) &dummy == (uintptr_t) (void *) &dummy)
1868 ? (int (*) (void const *, void const *)) sp->fts_compar
1869 : fts_compar);
1872 * Construct an array of pointers to the structures and call qsort(3).
1873 * Reassemble the array in the order returned by qsort. If unable to
1874 * sort for memory reasons, return the directory entries in their
1875 * current order. Allocate enough space for the current needs plus
1876 * 40 so don't realloc one entry at a time.
1878 if (nitems > sp->fts_nitems) {
1879 FTSENT **a;
1881 sp->fts_nitems = nitems + 40;
1882 if (SIZE_MAX / sizeof *a < sp->fts_nitems
1883 || ! (a = realloc (sp->fts_array,
1884 sp->fts_nitems * sizeof *a))) {
1885 free(sp->fts_array);
1886 sp->fts_array = NULL;
1887 sp->fts_nitems = 0;
1888 return (head);
1890 sp->fts_array = a;
1892 for (ap = sp->fts_array, p = head; p; p = p->fts_link)
1893 *ap++ = p;
1894 qsort((void *)sp->fts_array, nitems, sizeof(FTSENT *), compare);
1895 for (head = *(ap = sp->fts_array); --nitems; ++ap)
1896 ap[0]->fts_link = ap[1];
1897 ap[0]->fts_link = NULL;
1898 return (head);
1901 static FTSENT *
1902 internal_function
1903 fts_alloc (FTS *sp, const char *name, register size_t namelen)
1905 register FTSENT *p;
1906 size_t len;
1909 * The file name is a variable length array. Allocate the FTSENT
1910 * structure and the file name in one chunk.
1912 len = FLEXSIZEOF(FTSENT, fts_name, namelen + 1);
1913 if ((p = malloc(len)) == NULL)
1914 return (NULL);
1916 /* Copy the name and guarantee NUL termination. */
1917 memcpy(p->fts_name, name, namelen);
1918 p->fts_name[namelen] = '\0';
1920 p->fts_namelen = namelen;
1921 p->fts_fts = sp;
1922 p->fts_path = sp->fts_path;
1923 p->fts_errno = 0;
1924 p->fts_dirp = NULL;
1925 p->fts_flags = 0;
1926 p->fts_instr = FTS_NOINSTR;
1927 p->fts_number = 0;
1928 p->fts_pointer = NULL;
1929 return (p);
1932 static void
1933 internal_function
1934 fts_lfree (register FTSENT *head)
1936 register FTSENT *p;
1937 int err = errno;
1939 /* Free a linked list of structures. */
1940 while ((p = head)) {
1941 head = head->fts_link;
1942 if (p->fts_dirp)
1943 closedir (p->fts_dirp);
1944 free(p);
1947 __set_errno (err);
1951 * Allow essentially unlimited file name lengths; find, rm, ls should
1952 * all work on any tree. Most systems will allow creation of file
1953 * names much longer than MAXPATHLEN, even though the kernel won't
1954 * resolve them. Add the size (not just what's needed) plus 256 bytes
1955 * so don't realloc the file name 2 bytes at a time.
1957 static bool
1958 internal_function
1959 fts_palloc (FTS *sp, size_t more)
1961 char *p;
1962 size_t new_len = sp->fts_pathlen + more + 256;
1965 * See if fts_pathlen would overflow.
1967 if (new_len < sp->fts_pathlen) {
1968 free(sp->fts_path);
1969 sp->fts_path = NULL;
1970 __set_errno (ENAMETOOLONG);
1971 return false;
1973 sp->fts_pathlen = new_len;
1974 p = realloc(sp->fts_path, sp->fts_pathlen);
1975 if (p == NULL) {
1976 free(sp->fts_path);
1977 sp->fts_path = NULL;
1978 return false;
1980 sp->fts_path = p;
1981 return true;
1985 * When the file name is realloc'd, have to fix all of the pointers in
1986 * structures already returned.
1988 static void
1989 internal_function
1990 fts_padjust (FTS *sp, FTSENT *head)
1992 FTSENT *p;
1993 char *addr = sp->fts_path;
1995 /* This code looks at bit-patterns of freed pointers to
1996 relocate them, so it relies on undefined behavior. If this
1997 trick does not work on your platform, please report a bug. */
1999 #define ADJUST(p) do { \
2000 uintptr_t old_accpath = (uintptr_t) (p)->fts_accpath; \
2001 if (old_accpath != (uintptr_t) (p)->fts_name) { \
2002 (p)->fts_accpath = \
2003 addr + (old_accpath - (uintptr_t) (p)->fts_path); \
2005 (p)->fts_path = addr; \
2006 } while (0)
2007 /* Adjust the current set of children. */
2008 for (p = sp->fts_child; p; p = p->fts_link)
2009 ADJUST(p);
2011 /* Adjust the rest of the tree, including the current level. */
2012 for (p = head; p->fts_level >= FTS_ROOTLEVEL;) {
2013 ADJUST(p);
2014 p = p->fts_link ? p->fts_link : p->fts_parent;
2018 static size_t
2019 internal_function _GL_ATTRIBUTE_PURE
2020 fts_maxarglen (char * const *argv)
2022 size_t len, max;
2024 for (max = 0; *argv; ++argv)
2025 if ((len = strlen(*argv)) > max)
2026 max = len;
2027 return (max + 1);
2031 * Change to dir specified by fd or file name without getting
2032 * tricked by someone changing the world out from underneath us.
2033 * Assumes p->fts_statp->st_dev and p->fts_statp->st_ino are filled in.
2034 * If FD is non-negative, expect it to be used after this function returns,
2035 * and to be closed eventually. So don't pass e.g., 'dirfd(dirp)' and then
2036 * do closedir(dirp), because that would invalidate the saved FD.
2037 * Upon failure, close FD immediately and return nonzero.
2039 static int
2040 internal_function
2041 fts_safe_changedir (FTS *sp, FTSENT *p, int fd, char const *dir)
2043 int ret;
2044 bool is_dotdot = dir && STREQ (dir, "..");
2045 int newfd;
2047 /* This clause handles the unusual case in which FTS_NOCHDIR
2048 is specified, along with FTS_CWDFD. In that case, there is
2049 no need to change even the virtual cwd file descriptor.
2050 However, if FD is non-negative, we do close it here. */
2051 if (ISSET (FTS_NOCHDIR))
2053 if (ISSET (FTS_CWDFD) && 0 <= fd)
2054 close (fd);
2055 return 0;
2058 if (fd < 0 && is_dotdot && ISSET (FTS_CWDFD))
2060 /* When possible, skip the diropen and subsequent fstat+dev/ino
2061 comparison. I.e., when changing to parent directory
2062 (chdir ("..")), use a file descriptor from the ring and
2063 save the overhead of diropen+fstat, as well as avoiding
2064 failure when we lack "x" access to the virtual cwd. */
2065 if ( ! i_ring_empty (&sp->fts_fd_ring))
2067 int parent_fd;
2068 fd_ring_print (sp, stderr, "pre-pop");
2069 parent_fd = i_ring_pop (&sp->fts_fd_ring);
2070 if (0 <= parent_fd)
2072 fd = parent_fd;
2073 dir = NULL;
2078 newfd = fd;
2079 if (fd < 0 && (newfd = diropen (sp, dir)) < 0)
2080 return -1;
2082 /* The following dev/inode check is necessary if we're doing a
2083 "logical" traversal (through symlinks, a la chown -L), if the
2084 system lacks O_NOFOLLOW support, or if we're changing to ".."
2085 (but not via a popped file descriptor). When changing to the
2086 name "..", O_NOFOLLOW can't help. In general, when the target is
2087 not "..", diropen's use of O_NOFOLLOW ensures we don't mistakenly
2088 follow a symlink, so we can avoid the expense of this fstat. */
2089 if (ISSET(FTS_LOGICAL) || ! HAVE_WORKING_O_NOFOLLOW
2090 || (dir && STREQ (dir, "..")))
2092 struct stat sb;
2093 if (fstat(newfd, &sb))
2095 ret = -1;
2096 goto bail;
2098 if (p->fts_statp->st_dev != sb.st_dev
2099 || p->fts_statp->st_ino != sb.st_ino)
2101 __set_errno (ENOENT); /* disinformation */
2102 ret = -1;
2103 goto bail;
2107 if (ISSET(FTS_CWDFD))
2109 cwd_advance_fd (sp, newfd, ! is_dotdot);
2110 return 0;
2113 ret = fchdir(newfd);
2114 bail:
2115 if (fd < 0)
2117 int oerrno = errno;
2118 (void)close(newfd);
2119 __set_errno (oerrno);
2121 return ret;