1 /* Read, sort and compare two directories. Used for GNU DIFF.
3 Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006-2007,
4 2009-2013, 2015-2025 Free Software Foundation, Inc.
6 This file is part of GNU DIFF.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <filenamecat.h>
33 #ifndef HAVE_STRUCT_DIRENT_D_TYPE
34 # define HAVE_STRUCT_DIRENT_D_TYPE false
37 /* A sorted vector of file names obtained by reading a directory.
38 If HAVE_STRUCT_DIRENT_D_TYPE, each name is preceded by a byte
39 giving the file type as an enum detype value. */
43 idx_t nnames
; /* Number of names. */
44 char const **names
; /* Sorted names of files in dir, followed by 0. */
45 char *data
; /* Allocated storage for file names. */
48 /* Whether file names in directories should be compared with
49 locale-specific sorting. */
50 static bool locale_specific_sorting
;
52 /* Where to go if locale-specific sorting fails. */
53 static jmp_buf failed_locale_specific_sorting
;
55 static int compare_names (char const *, char const *);
56 static bool dir_loop (struct comparison
const *, int);
59 /* Given the parent directory PARENTDIRFD (negative for current dir),
60 read the directory named by DIR and store into DIRDATA a sorted
61 vector of filenames for its contents.
62 Use DIR's basename if PARENTDIRFD is nonnegative, for efficiency.
63 If DIR->desc == NONEXISTENT, this directory is known to be
64 nonexistent so set DIRDATA to an empty vector;
65 otherwise, update DIR->desc and DIR->dirstream as needed.
66 If STARTFILE, ignore directory entries less than STARTFILE, and if
67 STARTFILE_ONLY, also ignore directory entries greater than STARTFILE.
68 Return true if successful, false (setting errno) otherwise. */
71 dir_read (int parentdirfd
, struct file_data
*dir
, struct dirdata
*dirdata
,
72 char const *startfile
, bool startfile_only
)
74 /* Number of files in directory. */
77 /* Allocated and used storage for file name data. */
80 dirdata
->names
= nullptr;
81 dirdata
->data
= nullptr;
83 if (dir
->desc
!= NONEXISTENT
)
85 /* Open the directory and check for errors. */
86 int dirfd
= dir
->desc
;
89 dirfd
= openat (parentdirfd
,
90 (parentdirfd
< 0 ? dir
->name
91 : last_component (dir
->name
)),
92 (O_RDONLY
| O_CLOEXEC
| O_DIRECTORY
93 | (no_dereference_symlinks
? O_NOFOLLOW
: 0)));
98 DIR *reading
= fdopendir (dirfd
);
101 dir
->dirstream
= reading
;
103 /* Initialize the table of filenames. */
105 idx_t data_alloc
= 512;
107 dirdata
->data
= data
= ximalloc (data_alloc
);
109 /* Read the directory entries, and insert the subfiles
110 into the 'data' table. */
115 struct dirent
*next
= readdir (reading
);
119 char *d_name
= next
->d_name
;
121 /* Ignore "." and "..". */
123 && (d_name
[1] == 0 || (d_name
[1] == '.' && d_name
[2] == 0)))
128 int cmp
= compare_names (d_name
, startfile
);
129 if (cmp
< 0 || (startfile_only
&& !!cmp
))
133 if (excluded_file_name (excluded
, d_name
))
136 idx_t d_size
= HAVE_STRUCT_DIRENT_D_TYPE
+ _D_EXACT_NAMLEN (next
) + 1;
137 if (data_alloc
- data_used
< d_size
)
139 = xpalloc (data
, &data_alloc
,
140 d_size
- (data_alloc
- data_used
), -1, 1);
141 #if HAVE_STRUCT_DIRENT_D_TYPE
143 switch (next
->d_type
)
145 case DT_BLK
: detype
= DE_BLK
; break;
146 case DT_CHR
: detype
= DE_CHR
; break;
147 case DT_DIR
: detype
= DE_DIR
; break;
148 case DT_FIFO
: detype
= DE_FIFO
; break;
149 case DT_LNK
: detype
= DE_LNK
; break;
150 case DT_REG
: detype
= DE_REG
; break;
151 case DT_SOCK
: detype
= DE_SOCK
; break;
153 case DT_WHT
: detype
= DE_WHT
; break;
155 case DT_UNKNOWN
: detype
= DE_UNKNOWN
; break;
156 default: detype
= DE_OTHER
; break;
158 data
[data_used
++] = detype
;
161 memcpy (data
+ data_used
, d_name
, d_size
);
170 /* Create the 'names' table from the 'data' table. */
171 char const **names
= xinmalloc (nnames
+ 1, sizeof *names
);
172 dirdata
->names
= names
;
173 dirdata
->nnames
= nnames
;
174 for (idx_t i
= 0; i
< nnames
; i
++)
176 data
+= HAVE_STRUCT_DIRENT_D_TYPE
;
178 data
+= strlen (data
) + 1;
180 names
[nnames
] = nullptr;
184 /* Compare strings in a locale-specific way, returning a value
185 compatible with strcmp. */
188 compare_collated (char const *name1
, char const *name2
)
191 int r
= strcoll (name1
, name2
);
194 error (0, errno
, _("cannot compare file names %s and %s"),
195 quote_n (0, name1
), quote_n (1, name2
));
196 longjmp (failed_locale_specific_sorting
, 1);
201 /* Compare file names, returning a value compatible with strcmp. */
204 compare_names (char const *name1
, char const *name2
)
206 if (ignore_file_name_case
)
207 return mbscasecmp (name1
, name2
); /* Best we can do. */
209 if (locale_specific_sorting
)
211 int diff
= compare_collated (name1
, name2
);
215 return file_name_cmp (name1
, name2
);
218 /* Compare names FILE1 and FILE2 when sorting a directory.
219 Prefer filtered comparison, breaking ties with file_name_cmp. */
222 compare_names_for_qsort (void const *file1
, void const *file2
)
224 char const *const *f1
= file1
;
225 char const *const *f2
= file2
;
226 return compare_names (*f1
, *f2
);
229 /* Compare the contents of two directories named in CMP.
230 This is a top-level routine; it does everything necessary for diff
233 If CMP->file[0].desc == NONEXISTENT, directory CMP->file[0] doesn't exist
234 and pretend it is empty. Otherwise, update CMP->file[0].desc and
235 CMP->file[0].dirstream as needed. Likewise for CMP->file[1].
237 Returns the maximum of all the values returned by compare_files,
238 or EXIT_TROUBLE if trouble is encountered in opening files. */
241 diff_dirs (struct comparison
*cmp
)
243 if ((cmp
->file
[0].desc
== NONEXISTENT
|| dir_loop (cmp
, 0))
244 && (cmp
->file
[1].desc
== NONEXISTENT
|| dir_loop (cmp
, 1)))
246 error (0, 0, _("%s: recursive directory loop"),
247 squote (0, cmp
->file
[cmp
->file
[0].desc
== NONEXISTENT
].name
));
251 /* Get contents of both dirs. */
252 struct dirdata dirdata
[2];
253 int val
= EXIT_SUCCESS
;
254 for (int i
= 0; i
< 2; i
++)
255 if (! dir_read (cmp
->parent
->file
[i
].desc
, &cmp
->file
[i
], &dirdata
[i
],
256 cmp
->parent
== &noparent
? starting_file
: nullptr, false))
258 perror_with_name (cmp
->file
[i
].name
);
262 if (val
== EXIT_SUCCESS
)
264 /* Use locale-specific sorting if possible, else native byte order. */
265 locale_specific_sorting
= true;
266 if (! ignore_file_name_case
)
267 if (setjmp (failed_locale_specific_sorting
))
268 locale_specific_sorting
= false;
270 /* Sort the directories. */
271 for (int i
= 0; i
< 2; i
++)
272 qsort (dirdata
[i
].names
, dirdata
[i
].nnames
, sizeof *dirdata
[i
].names
,
273 compare_names_for_qsort
);
275 /* Loop while files remain in one or both dirs. */
276 char const **n0
= dirdata
[0].names
;
277 char const **n1
= dirdata
[1].names
;
280 /* Compare next name in dir 0 with next name in dir 1.
282 pretend the "next name" in that dir is very large. */
283 int nameorder
= !*n0
? 1 : !*n1
? -1 : compare_names (*n0
, *n1
);
285 /* Prefer a file_name_cmp match if available. This algorithm is
286 O(N**2), where N is the number of names in a directory
287 that compare_names says are all equal, but in practice N
288 is so small it's not worth tuning. */
289 if (nameorder
== 0 && ignore_file_name_case
)
291 int raw_order
= file_name_cmp (*n0
, *n1
);
294 char const **lesser
= raw_order
< 0 ? n0
: n1
;
295 char const *greater_name
= *(raw_order
< 0 ? n1
: n0
);
297 for (char const **p
= lesser
+ 1;
298 *p
&& compare_names (*p
, greater_name
) == 0;
301 int c
= file_name_cmp (*p
, greater_name
);
306 memmove (lesser
+ 1, lesser
,
307 (char *) p
- (char *) lesser
);
308 *lesser
= greater_name
;
316 enum detype detypes
[]
317 = { HAVE_STRUCT_DIRENT_D_TYPE
&& *n0
? (*n0
)[-1] : DE_UNKNOWN
,
318 HAVE_STRUCT_DIRENT_D_TYPE
&& *n1
? (*n1
)[-1] : DE_UNKNOWN
};
319 int v1
= compare_files (cmp
, detypes
,
320 0 < nameorder
? nullptr : *n0
++,
321 nameorder
< 0 ? nullptr : *n1
++);
327 for (int i
= 0; i
< 2; i
++)
329 free (dirdata
[i
].names
);
330 free (dirdata
[i
].data
);
336 /* Return nonzero if CMP is looping recursively in argument I. */
338 static bool ATTRIBUTE_PURE
339 dir_loop (struct comparison
const *cmp
, int i
)
341 for (struct comparison
const *p
= cmp
; (p
= p
->parent
) != &noparent
; )
342 if (same_file (&p
->file
[i
].stat
, &cmp
->file
[i
].stat
))
347 /* Find a matching filename in a directory. */
350 find_dir_file_pathname (struct file_data
*dir
, char const *file
,
353 char const *match
= file
;
355 struct dirdata dirdata
;
356 dirdata
.names
= nullptr;
357 dirdata
.data
= nullptr;
359 if (ignore_file_name_case
&& dir_read (AT_FDCWD
, dir
, &dirdata
, file
, true))
360 for (char const **p
= dirdata
.names
; *p
; p
++)
362 if (file_name_cmp (*p
, file
) == 0)
371 *detype
= HAVE_STRUCT_DIRENT_D_TYPE
&& match
!= file
? match
[-1] : DE_UNKNOWN
;
372 char *val
= file_name_concat (dir
->name
, match
, nullptr);
373 free (dirdata
.names
);