maint: post-release administrivia
[diffutils.git] / src / dir.c
blobc94ef00565fb7436210e9fc0e45e9a8a49dd06c3
1 /* Read, sort and compare two directories. Used for GNU DIFF.
3 Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006-2007,
4 2009-2013, 2015-2025 Free Software Foundation, Inc.
6 This file is part of GNU DIFF.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "diff.h"
23 #include <diagnose.h>
24 #include <dirname.h>
25 #include <error.h>
26 #include <exclude.h>
27 #include <filenamecat.h>
28 #include <mcel.h>
29 #include <quote.h>
30 #include <setjmp.h>
31 #include <xalloc.h>
33 #ifndef HAVE_STRUCT_DIRENT_D_TYPE
34 # define HAVE_STRUCT_DIRENT_D_TYPE false
35 #endif
37 /* A sorted vector of file names obtained by reading a directory.
38 If HAVE_STRUCT_DIRENT_D_TYPE, each name is preceded by a byte
39 giving the file type as an enum detype value. */
41 struct dirdata
43 idx_t nnames; /* Number of names. */
44 char const **names; /* Sorted names of files in dir, followed by 0. */
45 char *data; /* Allocated storage for file names. */
48 /* Whether file names in directories should be compared with
49 locale-specific sorting. */
50 static bool locale_specific_sorting;
52 /* Where to go if locale-specific sorting fails. */
53 static jmp_buf failed_locale_specific_sorting;
55 static int compare_names (char const *, char const *);
56 static bool dir_loop (struct comparison const *, int);
59 /* Given the parent directory PARENTDIRFD (negative for current dir),
60 read the directory named by DIR and store into DIRDATA a sorted
61 vector of filenames for its contents.
62 Use DIR's basename if PARENTDIRFD is nonnegative, for efficiency.
63 If DIR->desc == NONEXISTENT, this directory is known to be
64 nonexistent so set DIRDATA to an empty vector;
65 otherwise, update DIR->desc and DIR->dirstream as needed.
66 If STARTFILE, ignore directory entries less than STARTFILE, and if
67 STARTFILE_ONLY, also ignore directory entries greater than STARTFILE.
68 Return true if successful, false (setting errno) otherwise. */
70 static bool
71 dir_read (int parentdirfd, struct file_data *dir, struct dirdata *dirdata,
72 char const *startfile, bool startfile_only)
74 /* Number of files in directory. */
75 idx_t nnames = 0;
77 /* Allocated and used storage for file name data. */
78 char *data;
80 dirdata->names = nullptr;
81 dirdata->data = nullptr;
83 if (dir->desc != NONEXISTENT)
85 /* Open the directory and check for errors. */
86 int dirfd = dir->desc;
87 if (dirfd < 0)
89 dirfd = openat (parentdirfd,
90 (parentdirfd < 0 ? dir->name
91 : last_component (dir->name)),
92 (O_RDONLY | O_CLOEXEC | O_DIRECTORY
93 | (no_dereference_symlinks ? O_NOFOLLOW : 0)));
94 if (dirfd < 0)
95 return false;
96 dir->desc = dirfd;
98 DIR *reading = fdopendir (dirfd);
99 if (!reading)
100 return false;
101 dir->dirstream = reading;
103 /* Initialize the table of filenames. */
105 idx_t data_alloc = 512;
106 idx_t data_used = 0;
107 dirdata->data = data = ximalloc (data_alloc);
109 /* Read the directory entries, and insert the subfiles
110 into the 'data' table. */
112 while (true)
114 errno = 0;
115 struct dirent *next = readdir (reading);
116 if (!next)
117 break;
119 char *d_name = next->d_name;
121 /* Ignore "." and "..". */
122 if (d_name[0] == '.'
123 && (d_name[1] == 0 || (d_name[1] == '.' && d_name[2] == 0)))
124 continue;
126 if (startfile)
128 int cmp = compare_names (d_name, startfile);
129 if (cmp < 0 || (startfile_only && !!cmp))
130 continue;
133 if (excluded_file_name (excluded, d_name))
134 continue;
136 idx_t d_size = HAVE_STRUCT_DIRENT_D_TYPE + _D_EXACT_NAMLEN (next) + 1;
137 if (data_alloc - data_used < d_size)
138 dirdata->data = data
139 = xpalloc (data, &data_alloc,
140 d_size - (data_alloc - data_used), -1, 1);
141 #if HAVE_STRUCT_DIRENT_D_TYPE
142 char detype;
143 switch (next->d_type)
145 case DT_BLK: detype = DE_BLK; break;
146 case DT_CHR: detype = DE_CHR; break;
147 case DT_DIR: detype = DE_DIR; break;
148 case DT_FIFO: detype = DE_FIFO; break;
149 case DT_LNK: detype = DE_LNK; break;
150 case DT_REG: detype = DE_REG; break;
151 case DT_SOCK: detype = DE_SOCK; break;
152 # ifdef DT_WHT
153 case DT_WHT: detype = DE_WHT; break;
154 # endif
155 case DT_UNKNOWN: detype = DE_UNKNOWN; break;
156 default: detype = DE_OTHER; break;
158 data[data_used++] = detype;
159 d_size--;
160 #endif
161 memcpy (data + data_used, d_name, d_size);
162 data_used += d_size;
163 nnames++;
166 if (errno)
167 return false;
170 /* Create the 'names' table from the 'data' table. */
171 char const **names = xinmalloc (nnames + 1, sizeof *names);
172 dirdata->names = names;
173 dirdata->nnames = nnames;
174 for (idx_t i = 0; i < nnames; i++)
176 data += HAVE_STRUCT_DIRENT_D_TYPE;
177 names[i] = data;
178 data += strlen (data) + 1;
180 names[nnames] = nullptr;
181 return true;
184 /* Compare strings in a locale-specific way, returning a value
185 compatible with strcmp. */
187 static int
188 compare_collated (char const *name1, char const *name2)
190 errno = 0;
191 int r = strcoll (name1, name2);
192 if (errno)
194 error (0, errno, _("cannot compare file names %s and %s"),
195 quote_n (0, name1), quote_n (1, name2));
196 longjmp (failed_locale_specific_sorting, 1);
198 return r;
201 /* Compare file names, returning a value compatible with strcmp. */
203 static int
204 compare_names (char const *name1, char const *name2)
206 if (ignore_file_name_case)
207 return mbscasecmp (name1, name2); /* Best we can do. */
209 if (locale_specific_sorting)
211 int diff = compare_collated (name1, name2);
212 if (diff)
213 return diff;
215 return file_name_cmp (name1, name2);
218 /* Compare names FILE1 and FILE2 when sorting a directory.
219 Prefer filtered comparison, breaking ties with file_name_cmp. */
221 static int
222 compare_names_for_qsort (void const *file1, void const *file2)
224 char const *const *f1 = file1;
225 char const *const *f2 = file2;
226 return compare_names (*f1, *f2);
229 /* Compare the contents of two directories named in CMP.
230 This is a top-level routine; it does everything necessary for diff
231 on two directories.
233 If CMP->file[0].desc == NONEXISTENT, directory CMP->file[0] doesn't exist
234 and pretend it is empty. Otherwise, update CMP->file[0].desc and
235 CMP->file[0].dirstream as needed. Likewise for CMP->file[1].
237 Returns the maximum of all the values returned by compare_files,
238 or EXIT_TROUBLE if trouble is encountered in opening files. */
241 diff_dirs (struct comparison *cmp)
243 if ((cmp->file[0].desc == NONEXISTENT || dir_loop (cmp, 0))
244 && (cmp->file[1].desc == NONEXISTENT || dir_loop (cmp, 1)))
246 error (0, 0, _("%s: recursive directory loop"),
247 squote (0, cmp->file[cmp->file[0].desc == NONEXISTENT].name));
248 return EXIT_TROUBLE;
251 /* Get contents of both dirs. */
252 struct dirdata dirdata[2];
253 int val = EXIT_SUCCESS;
254 for (int i = 0; i < 2; i++)
255 if (! dir_read (cmp->parent->file[i].desc, &cmp->file[i], &dirdata[i],
256 cmp->parent == &noparent ? starting_file : nullptr, false))
258 perror_with_name (cmp->file[i].name);
259 val = EXIT_TROUBLE;
262 if (val == EXIT_SUCCESS)
264 /* Use locale-specific sorting if possible, else native byte order. */
265 locale_specific_sorting = true;
266 if (! ignore_file_name_case)
267 if (setjmp (failed_locale_specific_sorting))
268 locale_specific_sorting = false;
270 /* Sort the directories. */
271 for (int i = 0; i < 2; i++)
272 qsort (dirdata[i].names, dirdata[i].nnames, sizeof *dirdata[i].names,
273 compare_names_for_qsort);
275 /* Loop while files remain in one or both dirs. */
276 char const **n0 = dirdata[0].names;
277 char const **n1 = dirdata[1].names;
278 while (*n0 || *n1)
280 /* Compare next name in dir 0 with next name in dir 1.
281 At the end of a dir,
282 pretend the "next name" in that dir is very large. */
283 int nameorder = !*n0 ? 1 : !*n1 ? -1 : compare_names (*n0, *n1);
285 /* Prefer a file_name_cmp match if available. This algorithm is
286 O(N**2), where N is the number of names in a directory
287 that compare_names says are all equal, but in practice N
288 is so small it's not worth tuning. */
289 if (nameorder == 0 && ignore_file_name_case)
291 int raw_order = file_name_cmp (*n0, *n1);
292 if (raw_order != 0)
294 char const **lesser = raw_order < 0 ? n0 : n1;
295 char const *greater_name = *(raw_order < 0 ? n1 : n0);
297 for (char const **p = lesser + 1;
298 *p && compare_names (*p, greater_name) == 0;
299 p++)
301 int c = file_name_cmp (*p, greater_name);
302 if (0 <= c)
304 if (c == 0)
306 memmove (lesser + 1, lesser,
307 (char *) p - (char *) lesser);
308 *lesser = greater_name;
310 break;
316 enum detype detypes[]
317 = { HAVE_STRUCT_DIRENT_D_TYPE && *n0 ? (*n0)[-1] : DE_UNKNOWN,
318 HAVE_STRUCT_DIRENT_D_TYPE && *n1 ? (*n1)[-1] : DE_UNKNOWN };
319 int v1 = compare_files (cmp, detypes,
320 0 < nameorder ? nullptr : *n0++,
321 nameorder < 0 ? nullptr : *n1++);
322 if (val < v1)
323 val = v1;
327 for (int i = 0; i < 2; i++)
329 free (dirdata[i].names);
330 free (dirdata[i].data);
333 return val;
336 /* Return nonzero if CMP is looping recursively in argument I. */
338 static bool ATTRIBUTE_PURE
339 dir_loop (struct comparison const *cmp, int i)
341 for (struct comparison const *p = cmp; (p = p->parent) != &noparent; )
342 if (same_file (&p->file[i].stat, &cmp->file[i].stat))
343 return true;
344 return false;
347 /* Find a matching filename in a directory. */
349 char *
350 find_dir_file_pathname (struct file_data *dir, char const *file,
351 enum detype *detype)
353 char const *match = file;
355 struct dirdata dirdata;
356 dirdata.names = nullptr;
357 dirdata.data = nullptr;
359 if (ignore_file_name_case && dir_read (AT_FDCWD, dir, &dirdata, file, true))
360 for (char const **p = dirdata.names; *p; p++)
362 if (file_name_cmp (*p, file) == 0)
364 match = *p;
365 break;
367 if (match == file)
368 match = *p;
371 *detype = HAVE_STRUCT_DIRENT_D_TYPE && match != file ? match[-1] : DE_UNKNOWN;
372 char *val = file_name_concat (dir->name, match, nullptr);
373 free (dirdata.names);
374 free (dirdata.data);
375 return val;