doc: rewrite the "Unusual File Names" section
[diffutils.git] / src / diff.c
blobc465d4019da3156f5fec829b7229b3fb8cb5eee0
1 /* GNU diff - compare files line by line
3 Copyright (C) 1988-1989, 1992-1994, 1996, 1998, 2001-2002, 2004, 2006-2007,
4 2009-2013, 2015-2025 Free Software Foundation, Inc.
6 This file is part of GNU DIFF.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #define DIFF_INLINE _GL_EXTERN_INLINE
22 #include "diff.h"
23 #include "paths.h"
25 #include <binary-io.h>
26 #include <c-ctype.h>
27 #include <c-stack.h>
28 #include <careadlinkat.h>
29 #include <diagnose.h>
30 #include <dirname.h>
31 #include <error.h>
32 #include <exclude.h>
33 #include <exitfail.h>
34 #include <file-type.h>
35 #include <filenamecat.h>
36 #include <fnmatch.h>
37 #include <getopt.h>
38 #include <hard-locale.h>
39 #include <progname.h>
40 #include <quote.h>
41 #include <sh-quote.h>
42 #include <stat-time.h>
43 #include <version-etc.h>
44 #include <xalloc.h>
45 #include <xstdopen.h>
47 #ifdef MAJOR_IN_MKDEV
48 # include <sys/mkdev.h>
49 #elif defined MAJOR_IN_SYSMACROS
50 # include <sys/sysmacros.h>
51 #elif !defined major /* Might be defined in sys/types.h. */
52 # define major(dev) (((dev) >> 8) & 0xff)
53 # define minor(dev) ((dev) & 0xff)
54 #endif
56 /* The official name of this program (e.g., no 'g' prefix). */
57 static char const PROGRAM_NAME[] = "diff";
59 #define AUTHORS \
60 _("Paul Eggert"), \
61 _("Mike Haertel"), \
62 _("David Hayes"), \
63 _("Richard Stallman"), \
64 _("Len Tower")
66 #ifndef GUTTER_WIDTH_MINIMUM
67 # define GUTTER_WIDTH_MINIMUM 3
68 #endif
70 struct regexp_list
72 char *regexps; /* chars representing disjunction of the regexps */
73 idx_t len; /* chars used in 'regexps' */
74 idx_t size; /* size malloc'ed for 'regexps'; 0 if not malloc'ed */
75 bool multiple_regexps;/* Does 'regexps' represent a disjunction? */
76 struct re_pattern_buffer *buf;
79 static void add_regexp (struct regexp_list *, char const *);
80 static void summarize_regexp_list (struct regexp_list *);
81 static void specify_style (enum output_style);
82 static void specify_value (char const **, char const *, char const *);
83 static void specify_colors_style (char const *);
84 static void check_stdout (void);
85 static void usage (void);
87 /* If comparing directories, compare their common subdirectories
88 recursively. */
89 static bool recursive;
91 /* In context diffs, show previous lines that match these regexps. */
92 static struct regexp_list function_regexp_list;
94 /* Ignore changes affecting only lines that match these regexps. */
95 static struct regexp_list ignore_regexp_list;
97 #if O_BINARY
98 /* Use binary I/O when reading and writing data (--binary).
99 On POSIX hosts, this has no effect. */
100 static bool binary;
101 #else
102 enum { binary = true };
103 #endif
105 /* Use Linux-style O_PATH if available and supported by fstat(),
106 POSIX-style O_SEARCH otherwise. */
107 #if O_PATH_SUPPORTS_FSTAT
108 enum { O_PATH_DEFINED = true };
109 enum { O_PATHSEARCH = O_PATH };
110 #else
111 enum { O_PATH_DEFINED = false };
112 enum { O_PATHSEARCH = O_SEARCH };
113 #endif
115 /* If one file is missing, treat it as present but empty (-N). */
116 static bool new_file;
118 /* If the first file is missing, treat it as present but empty
119 (--unidirectional-new-file). */
120 static bool unidirectional_new_file;
122 /* Report files compared that are the same (-s).
123 Normally nothing is output when that happens. */
124 static bool report_identical_files;
126 /* Do not treat directories specially. */
127 static bool no_directory;
129 /* Values for long options that do not have single-letter equivalents. */
130 enum
132 BINARY_OPTION = CHAR_MAX + 1,
133 FROM_FILE_OPTION,
134 HELP_OPTION,
135 HORIZON_LINES_OPTION,
136 IGNORE_FILE_NAME_CASE_OPTION,
137 INHIBIT_HUNK_MERGE_OPTION,
138 LEFT_COLUMN_OPTION,
139 LINE_FORMAT_OPTION,
140 NO_DEREFERENCE_OPTION,
141 NO_IGNORE_FILE_NAME_CASE_OPTION,
142 NORMAL_OPTION,
143 SDIFF_MERGE_ASSIST_OPTION,
144 STRIP_TRAILING_CR_OPTION,
145 SUPPRESS_BLANK_EMPTY_OPTION,
146 SUPPRESS_COMMON_LINES_OPTION,
147 TABSIZE_OPTION,
148 TO_FILE_OPTION,
150 /* These options must be in sequence. */
151 UNCHANGED_LINE_FORMAT_OPTION,
152 OLD_LINE_FORMAT_OPTION,
153 NEW_LINE_FORMAT_OPTION,
155 /* These options must be in sequence. */
156 UNCHANGED_GROUP_FORMAT_OPTION,
157 OLD_GROUP_FORMAT_OPTION,
158 NEW_GROUP_FORMAT_OPTION,
159 CHANGED_GROUP_FORMAT_OPTION,
161 COLOR_OPTION,
162 COLOR_PALETTE_OPTION,
164 NO_DIRECTORY_OPTION,
165 PRESUME_OUTPUT_TTY_OPTION,
168 static char const group_format_option[][sizeof "--unchanged-group-format"] =
170 "--unchanged-group-format",
171 "--old-group-format",
172 "--new-group-format",
173 "--changed-group-format"
176 static char const line_format_option[][sizeof "--unchanged-line-format"] =
178 "--unchanged-line-format",
179 "--old-line-format",
180 "--new-line-format"
183 static char const shortopts[] =
184 "0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ";
185 static struct option const longopts[] =
187 {"binary", 0, 0, BINARY_OPTION},
188 {"brief", 0, 0, 'q'},
189 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
190 {"color", 2, 0, COLOR_OPTION},
191 {"context", 2, 0, 'C'},
192 {"ed", 0, 0, 'e'},
193 {"exclude", 1, 0, 'x'},
194 {"exclude-from", 1, 0, 'X'},
195 {"expand-tabs", 0, 0, 't'},
196 {"forward-ed", 0, 0, 'f'},
197 {"from-file", 1, 0, FROM_FILE_OPTION},
198 {"help", 0, 0, HELP_OPTION},
199 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
200 {"ifdef", 1, 0, 'D'},
201 {"ignore-all-space", 0, 0, 'w'},
202 {"ignore-blank-lines", 0, 0, 'B'},
203 {"ignore-case", 0, 0, 'i'},
204 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
205 {"ignore-matching-lines", 1, 0, 'I'},
206 {"ignore-space-change", 0, 0, 'b'},
207 {"ignore-tab-expansion", 0, 0, 'E'},
208 {"ignore-trailing-space", 0, 0, 'Z'},
209 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
210 {"initial-tab", 0, 0, 'T'},
211 {"label", 1, 0, 'L'},
212 {"left-column", 0, 0, LEFT_COLUMN_OPTION},
213 {"line-format", 1, 0, LINE_FORMAT_OPTION},
214 {"minimal", 0, 0, 'd'},
215 {"new-file", 0, 0, 'N'},
216 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
217 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
218 {"no-dereference", 0, 0, NO_DEREFERENCE_OPTION},
219 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
220 {"normal", 0, 0, NORMAL_OPTION},
221 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
222 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
223 {"paginate", 0, 0, 'l'},
224 {"palette", 1, 0, COLOR_PALETTE_OPTION},
225 {"rcs", 0, 0, 'n'},
226 {"recursive", 0, 0, 'r'},
227 {"report-identical-files", 0, 0, 's'},
228 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
229 {"show-c-function", 0, 0, 'p'},
230 {"show-function-line", 1, 0, 'F'},
231 {"side-by-side", 0, 0, 'y'},
232 {"speed-large-files", 0, 0, 'H'},
233 {"starting-file", 1, 0, 'S'},
234 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
235 {"suppress-blank-empty", 0, 0, SUPPRESS_BLANK_EMPTY_OPTION},
236 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
237 {"tabsize", 1, 0, TABSIZE_OPTION},
238 {"text", 0, 0, 'a'},
239 {"to-file", 1, 0, TO_FILE_OPTION},
240 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
241 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
242 {"unidirectional-new-file", 0, 0, 'P'},
243 {"unified", 2, 0, 'U'},
244 {"version", 0, 0, 'v'},
245 {"width", 1, 0, 'W'},
247 /* This is solely for diff3. Do not document. */
248 {"-no-directory", no_argument, nullptr, NO_DIRECTORY_OPTION},
250 /* This is solely for testing. Do not document. */
251 {"-presume-output-tty", no_argument, nullptr, PRESUME_OUTPUT_TTY_OPTION},
252 {0, 0, 0, 0}
255 /* Return a string containing the command options with which diff was invoked.
256 Spaces appear between what were separate ARGV-elements.
257 There is a space at the beginning but none at the end.
258 If there were no options, the result is an empty string.
260 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
261 the length of that vector. */
263 static char *
264 option_list (char **optionvec, int count)
266 idx_t size = 1;
268 for (int i = 0; i < count; i++)
270 size_t optsize = 1 + shell_quote_length (optionvec[i]);
271 if (ckd_add (&size, size, optsize))
272 xalloc_die ();
275 char *result = ximalloc (size);
276 char *p = result;
278 for (int i = 0; i < count; i++)
280 *p++ = ' ';
281 p = shell_quote_copy (p, optionvec[i]);
284 *p = '\0';
285 return result;
289 /* Return an option value suitable for add_exclude. */
291 static int
292 exclude_options (void)
294 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
298 main (int argc, char **argv)
300 /* Do our initializations. */
301 exit_failure = EXIT_TROUBLE;
302 initialize_main (&argc, &argv);
303 set_program_name (argv[0]);
304 setlocale (LC_ALL, "");
305 bindtextdomain (PACKAGE, LOCALEDIR);
306 textdomain (PACKAGE);
307 c_stack_action (nullptr);
308 function_regexp_list.buf = &function_regexp;
309 ignore_regexp_list.buf = &ignore_regexp;
310 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
311 excluded = new_exclude ();
312 presume_output_tty = false;
313 xstdopen ();
315 /* Parse command line options. */
317 lin ocontext = -1;
318 bool explicit_context = false;
319 intmax_t width = 0;
320 bool show_c_function = false;
321 char const *from_file = nullptr;
322 char const *to_file = nullptr;
324 for (int prev = -1, c;
325 0 <= (c = getopt_long (argc, argv, shortopts, longopts, nullptr));
326 prev = c)
327 switch (c)
329 case 0:
330 break;
332 case '0':
333 case '1':
334 case '2':
335 case '3':
336 case '4':
337 case '5':
338 case '6':
339 case '7':
340 case '8':
341 case '9':
342 if (! c_isdigit (prev))
343 ocontext = 0;
344 if (ckd_mul (&ocontext, ocontext, 10)
345 || ckd_add (&ocontext, ocontext, c - '0'))
346 ocontext = LIN_MAX;
347 break;
349 case 'a':
350 text = true;
351 break;
353 case 'b':
354 if (ignore_white_space < IGNORE_SPACE_CHANGE)
355 ignore_white_space = IGNORE_SPACE_CHANGE;
356 break;
358 case 'Z':
359 if (ignore_white_space < IGNORE_SPACE_CHANGE)
360 ignore_white_space |= IGNORE_TRAILING_SPACE;
361 break;
363 case 'B':
364 ignore_blank_lines = true;
365 break;
367 case 'C':
368 case 'U':
370 intmax_t numval;
372 if (optarg)
374 char *numend;
375 numval = strtoimax (optarg, &numend, 10);
376 if (*numend || numval < 0)
377 try_help ("invalid context length %s", quote (optarg));
379 else
380 numval = 3;
382 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
383 if (context < numval)
384 context = MIN (numval, LIN_MAX);
385 explicit_context = true;
387 break;
389 case 'c':
390 specify_style (OUTPUT_CONTEXT);
391 if (context < 3)
392 context = 3;
393 break;
395 case 'd':
396 minimal = true;
397 break;
399 case 'D':
400 specify_style (OUTPUT_IFDEF);
402 static char const C_ifdef_group_formats[]
403 = (/* UNCHANGED */
404 "%="
405 "\0"
407 /* OLD */
408 "#ifndef @\n"
409 "%<"
410 "#endif /* ! @ */\n"
411 "\0"
413 /* NEW */
414 "#ifdef @\n"
415 "%>"
416 "#endif /* @ */\n"
417 "\0"
419 /* CHANGED */
420 "#ifndef @\n"
421 "%<"
422 "#else /* @ */\n"
423 "%>"
424 "#endif /* @ */\n");
425 int nats = 7; /* 7 "@"s are in C_ifdef_group_formats. */
427 char *b = xinmalloc (((sizeof C_ifdef_group_formats + 1) / nats
428 + strlen (optarg)),
429 nats);
430 char *base = b;
431 int changes = 0;
433 for (int i = 0; i < sizeof C_ifdef_group_formats; i++)
435 char ch = C_ifdef_group_formats[i];
436 switch (ch)
438 default:
439 *b++ = ch;
440 break;
442 case '@':
443 b = stpcpy (b, optarg);
444 break;
446 case '\0':
447 *b++ = ch;
448 specify_value (&group_format[changes++], base, "-D");
449 base = b;
450 break;
454 break;
456 case 'e':
457 specify_style (OUTPUT_ED);
458 break;
460 case 'E':
461 if (ignore_white_space < IGNORE_SPACE_CHANGE)
462 ignore_white_space |= IGNORE_TAB_EXPANSION;
463 break;
465 case 'f':
466 specify_style (OUTPUT_FORWARD_ED);
467 break;
469 case 'F':
470 add_regexp (&function_regexp_list, optarg);
471 break;
473 case 'h':
474 /* Split the files into chunks for faster processing.
475 Usually does not change the result.
477 This currently has no effect. */
478 break;
480 case 'H':
481 speed_large_files = true;
482 break;
484 case 'i':
485 ignore_case = true;
486 break;
488 case 'I':
489 add_regexp (&ignore_regexp_list, optarg);
490 break;
492 case 'l':
493 if (!pr_program[0])
494 try_help ("pagination not supported on this host", nullptr);
495 paginate = true;
496 #ifdef SIGCHLD
497 /* Pagination requires forking and waiting, and
498 System V fork+wait does not work if SIGCHLD is ignored. */
499 signal (SIGCHLD, SIG_DFL);
500 #endif
501 break;
503 case 'L':
504 if (!file_label[0])
505 file_label[0] = optarg;
506 else if (!file_label[1])
507 file_label[1] = optarg;
508 else
509 fatal ("too many file label options");
510 break;
512 case 'n':
513 specify_style (OUTPUT_RCS);
514 break;
516 case 'N':
517 new_file = true;
518 break;
520 case 'p':
521 show_c_function = true;
522 add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
523 break;
525 case 'P':
526 unidirectional_new_file = true;
527 break;
529 case 'q':
530 brief = true;
531 break;
533 case 'r':
534 recursive = true;
535 break;
537 case 's':
538 report_identical_files = true;
539 break;
541 case 'S':
542 specify_value (&starting_file, optarg, "-S");
543 break;
545 case 't':
546 expand_tabs = true;
547 break;
549 case 'T':
550 initial_tab = true;
551 break;
553 case 'u':
554 specify_style (OUTPUT_UNIFIED);
555 if (context < 3)
556 context = 3;
557 break;
559 case 'v':
560 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version,
561 AUTHORS, nullptr);
562 check_stdout ();
563 return EXIT_SUCCESS;
565 case 'w':
566 ignore_white_space = IGNORE_ALL_SPACE;
567 break;
569 case 'x':
570 add_exclude (excluded, optarg, exclude_options ());
571 break;
573 case 'X':
574 if (add_exclude_file (add_exclude, excluded, optarg,
575 exclude_options (), '\n'))
576 pfatal_with_name (optarg);
577 break;
579 case 'y':
580 specify_style (OUTPUT_SDIFF);
581 break;
583 case 'W':
585 char *numend;
586 intmax_t numval = strtoimax (optarg, &numend, 10);
587 if (numval <= 0 || *numend)
588 try_help ("invalid width %s", quote (optarg));
589 if (width != numval)
591 if (width)
592 fatal ("conflicting width options");
593 width = numval;
596 break;
598 case BINARY_OPTION:
599 #if O_BINARY
600 binary = true;
601 if (! isatty (STDOUT_FILENO))
602 set_binary_mode (STDOUT_FILENO, O_BINARY);
603 #endif
604 break;
606 case FROM_FILE_OPTION:
607 specify_value (&from_file, optarg, "--from-file");
608 break;
610 case HELP_OPTION:
611 usage ();
612 check_stdout ();
613 return EXIT_SUCCESS;
615 case HORIZON_LINES_OPTION:
617 char *numend;
618 intmax_t numval = strtoimax (optarg, &numend, 10);
619 if (*numend || numval < 0)
620 try_help ("invalid horizon length %s", quote (optarg));
621 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
623 break;
625 case IGNORE_FILE_NAME_CASE_OPTION:
626 ignore_file_name_case = true;
627 break;
629 case INHIBIT_HUNK_MERGE_OPTION:
630 /* This option is obsolete, but accept it for backward
631 compatibility. */
632 break;
634 case LEFT_COLUMN_OPTION:
635 left_column = true;
636 break;
638 case LINE_FORMAT_OPTION:
639 specify_style (OUTPUT_IFDEF);
640 for (int i = 0; i < sizeof line_format / sizeof line_format[0]; i++)
641 specify_value (&line_format[i], optarg, "--line-format");
642 break;
644 case NO_DEREFERENCE_OPTION:
645 no_dereference_symlinks = true;
646 break;
648 case NO_IGNORE_FILE_NAME_CASE_OPTION:
649 ignore_file_name_case = false;
650 break;
652 case NORMAL_OPTION:
653 specify_style (OUTPUT_NORMAL);
654 break;
656 case SDIFF_MERGE_ASSIST_OPTION:
657 specify_style (OUTPUT_SDIFF);
658 sdiff_merge_assist = true;
659 break;
661 case STRIP_TRAILING_CR_OPTION:
662 strip_trailing_cr = true;
663 break;
665 case SUPPRESS_BLANK_EMPTY_OPTION:
666 suppress_blank_empty = true;
667 break;
669 case SUPPRESS_COMMON_LINES_OPTION:
670 suppress_common_lines = true;
671 break;
673 case TABSIZE_OPTION:
675 char *numend;
676 intmax_t numval = strtoimax (optarg, &numend, 10);
677 if (! (0 < numval && numval <= INTMAX_MAX - GUTTER_WIDTH_MINIMUM)
678 || *numend)
679 try_help ("invalid tabsize %s", quote (optarg));
680 if (tabsize != numval)
682 if (tabsize)
683 fatal ("conflicting tabsize options");
684 tabsize = numval;
687 break;
689 case TO_FILE_OPTION:
690 specify_value (&to_file, optarg, "--to-file");
691 break;
693 case UNCHANGED_LINE_FORMAT_OPTION:
694 case OLD_LINE_FORMAT_OPTION:
695 case NEW_LINE_FORMAT_OPTION:
696 specify_style (OUTPUT_IFDEF);
697 c -= UNCHANGED_LINE_FORMAT_OPTION;
698 specify_value (&line_format[c], optarg, line_format_option[c]);
699 break;
701 case UNCHANGED_GROUP_FORMAT_OPTION:
702 case OLD_GROUP_FORMAT_OPTION:
703 case NEW_GROUP_FORMAT_OPTION:
704 case CHANGED_GROUP_FORMAT_OPTION:
705 specify_style (OUTPUT_IFDEF);
706 c -= UNCHANGED_GROUP_FORMAT_OPTION;
707 specify_value (&group_format[c], optarg, group_format_option[c]);
708 break;
710 case COLOR_OPTION:
711 specify_colors_style (optarg);
712 break;
714 case COLOR_PALETTE_OPTION:
715 set_color_palette (optarg);
716 break;
718 case NO_DIRECTORY_OPTION:
719 no_directory = true;
720 break;
722 case PRESUME_OUTPUT_TTY_OPTION:
723 presume_output_tty = true;
724 break;
726 default:
727 try_help (nullptr, nullptr);
730 if (colors_style == AUTO)
732 char const *t = getenv ("TERM");
733 if (t && STREQ (t, "dumb"))
734 colors_style = NEVER;
737 if (output_style == OUTPUT_UNSPECIFIED)
739 if (show_c_function)
741 specify_style (OUTPUT_CONTEXT);
742 if (ocontext < 0)
743 context = 3;
745 else
746 specify_style (OUTPUT_NORMAL);
749 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
751 #if defined STAT_TIMESPEC || defined STAT_TIMESPEC_NS
752 time_format = "%Y-%m-%d %H:%M:%S.%N %z";
753 #else
754 time_format = "%Y-%m-%d %H:%M:%S %z";
755 #endif
756 #if !HAVE_TM_GMTOFF
757 localtz = tzalloc (getenv ("TZ"));
758 #endif
760 else
762 /* See POSIX 1003.1-2017 for this format. */
763 time_format = "%a %b %e %T %Y";
766 if (0 <= ocontext
767 && (output_style == OUTPUT_CONTEXT
768 || output_style == OUTPUT_UNIFIED)
769 && (context < ocontext
770 || (ocontext < context && ! explicit_context)))
771 context = ocontext;
773 if (! tabsize)
774 tabsize = 8;
775 if (! width)
776 width = 130;
779 /* Maximize first the half line width, and then the gutter width,
780 according to the following constraints:
782 1. Two half lines plus a gutter must fit in a line.
783 2. If the half line width is nonzero:
784 a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
785 b. If tabs are not expanded to spaces,
786 a half line plus a gutter is an integral number of tabs,
787 so that tabs in the right column line up. */
789 intmax_t t = expand_tabs ? 1 : tabsize;
790 intmax_t w = width;
791 intmax_t t_plus_g = t + GUTTER_WIDTH_MINIMUM;
792 intmax_t unaligned_off = (w >> 1) + (t_plus_g >> 1) + (w & t_plus_g & 1);
793 intmax_t off = unaligned_off - unaligned_off % t;
794 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off));
795 sdiff_column2_offset = sdiff_half_width ? off : w;
798 /* Make the horizon at least as large as the context, so that
799 shift_boundaries has more freedom to shift the first and last hunks. */
800 if (horizon_lines < context)
801 horizon_lines = context;
803 summarize_regexp_list (&function_regexp_list);
804 summarize_regexp_list (&ignore_regexp_list);
806 if (output_style == OUTPUT_IFDEF)
808 for (int i = 0; i < sizeof line_format / sizeof line_format[0]; i++)
809 if (!line_format[i])
810 line_format[i] = "%l\n";
811 if (!group_format[OLD])
812 group_format[OLD]
813 = group_format[CHANGED] ? group_format[CHANGED] : "%<";
814 if (!group_format[NEW])
815 group_format[NEW]
816 = group_format[CHANGED] ? group_format[CHANGED] : "%>";
817 if (!group_format[UNCHANGED])
818 group_format[UNCHANGED] = "%=";
819 if (!group_format[CHANGED])
821 char *p = xmalloc (strlen (group_format[OLD])
822 + strlen (group_format[NEW]) + 1);
823 group_format[CHANGED] = p;
824 strcpy (stpcpy (p, group_format[OLD]), group_format[NEW]);
828 no_diff_means_no_output =
829 (output_style == OUTPUT_IFDEF ?
830 (!*group_format[UNCHANGED]
831 || (STREQ (group_format[UNCHANGED], "%=")
832 && !*line_format[UNCHANGED]))
833 : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
835 files_can_be_treated_as_binary =
836 (brief & binary
837 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
838 | (ignore_regexp_list.regexps || ignore_white_space)));
840 switch_string = option_list (argv + 1, optind - 1);
842 int exit_status = EXIT_SUCCESS;
844 noparent.file[0].desc = AT_FDCWD;
845 noparent.file[1].desc = AT_FDCWD;
846 static enum detype const de_unknowns[] = {DE_UNKNOWN, DE_UNKNOWN};
848 if (from_file)
850 if (to_file)
851 fatal ("--from-file and --to-file both specified");
852 else
853 for (; optind < argc; optind++)
855 int status = compare_files (&noparent, de_unknowns,
856 from_file, argv[optind]);
857 if (exit_status < status)
858 exit_status = status;
861 else
863 if (to_file)
864 for (; optind < argc; optind++)
866 int status = compare_files (&noparent, de_unknowns,
867 argv[optind], to_file);
868 if (exit_status < status)
869 exit_status = status;
871 else
873 if (argc - optind != 2)
875 if (argc - optind < 2)
876 try_help ("missing operand after %s", quote (argv[argc - 1]));
877 else
878 try_help ("extra operand %s", quote (argv[optind + 2]));
881 exit_status = compare_files (&noparent, de_unknowns,
882 argv[optind], argv[optind + 1]);
886 /* Print any messages that were saved up for last. */
887 print_message_queue ();
889 check_stdout ();
890 cleanup_signal_handlers ();
891 return exit_status;
894 /* Append to REGLIST the regexp PATTERN. */
896 static void
897 add_regexp (struct regexp_list *reglist, char const *pattern)
899 idx_t patlen = strlen (pattern);
900 char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
902 if (m != 0)
903 error (EXIT_TROUBLE, 0, "%s: %s", squote (0, pattern), m);
904 else
906 char *regexps = reglist->regexps;
907 idx_t len = reglist->len;
908 bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
909 idx_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
910 idx_t size = reglist->size;
912 if (size <= newlen)
913 regexps = reglist->regexps = xpalloc (regexps, &reglist->size,
914 newlen - size + 1, -1, 1);
915 if (multiple_regexps)
917 regexps[len++] = '\\';
918 regexps[len++] = '|';
920 memcpy (regexps + len, pattern, patlen + 1);
924 /* Ensure that REGLIST represents the disjunction of its regexps.
925 This is done here, rather than earlier, to avoid O(N^2) behavior. */
927 static void
928 summarize_regexp_list (struct regexp_list *reglist)
930 if (reglist->regexps)
932 /* At least one regexp was specified. Allocate a fastmap for it. */
933 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
934 if (reglist->multiple_regexps)
936 /* Compile the disjunction of the regexps.
937 (If just one regexp was specified, it is already compiled.) */
938 char const *m = re_compile_pattern (reglist->regexps, reglist->len,
939 reglist->buf);
940 if (m)
941 error (EXIT_TROUBLE, 0, "%s: %s",
942 squote (0, reglist->regexps), m);
947 /* Get the value of errno after a system call fails,
948 and help the compiler by telling it that errno is positive. */
949 static int
950 get_errno (void)
952 int err = errno;
953 dassert (0 < err);
954 return err;
957 static void
958 check_stdout (void)
960 if (ferror (stdout))
961 fatal ("write failed");
962 else if (fclose (stdout) != 0)
963 pfatal_with_name (_("standard output"));
966 static char const *const option_help_msgid[] = {
967 N_(" --normal output a normal diff (the default)"),
968 N_("-q, --brief report only when files differ"),
969 N_("-s, --report-identical-files report when two files are the same"),
970 N_("-c, -C NUM, --context[=NUM] output NUM (default 3) lines of copied context"),
971 N_("-u, -U NUM, --unified[=NUM] output NUM (default 3) lines of unified context"),
972 N_("-e, --ed output an ed script"),
973 N_("-n, --rcs output an RCS format diff"),
974 N_("-y, --side-by-side output in two columns"),
975 N_("-W, --width=NUM output at most NUM (default 130) print columns"),
976 N_(" --left-column output only the left column of common lines"),
977 N_(" --suppress-common-lines do not output common lines"),
979 N_("-p, --show-c-function show which C function each change is in"),
980 N_("-F, --show-function-line=RE show the most recent line matching RE"),
981 N_(" --label LABEL use LABEL instead of file name and timestamp\n"
982 " (can be repeated)"),
984 N_("-t, --expand-tabs expand tabs to spaces in output"),
985 N_("-T, --initial-tab make tabs line up by prepending a tab"),
986 N_(" --tabsize=NUM tab stops every NUM (default 8) print columns"),
987 N_(" --suppress-blank-empty suppress space or tab before empty output lines"),
988 N_("-l, --paginate pass output through 'pr' to paginate it"),
990 N_("-r, --recursive recursively compare any subdirectories found"),
991 N_(" --no-dereference don't follow symbolic links"),
992 N_("-N, --new-file treat absent files as empty"),
993 N_(" --unidirectional-new-file treat absent first files as empty"),
994 N_(" --ignore-file-name-case ignore case when comparing file names"),
995 N_(" --no-ignore-file-name-case consider case when comparing file names"),
996 N_("-x, --exclude=PAT exclude files that match PAT"),
997 N_("-X, --exclude-from=FILE exclude files that match any pattern in FILE"),
998 N_("-S, --starting-file=FILE start with FILE when comparing directories"),
999 N_(" --from-file=FILE1 compare FILE1 to all operands;\n"
1000 " FILE1 can be a directory"),
1001 N_(" --to-file=FILE2 compare all operands to FILE2;\n"
1002 " FILE2 can be a directory"),
1004 N_("-i, --ignore-case ignore case differences in file contents"),
1005 N_("-E, --ignore-tab-expansion ignore changes due to tab expansion"),
1006 N_("-Z, --ignore-trailing-space ignore white space at line end"),
1007 N_("-b, --ignore-space-change ignore changes in the amount of white space"),
1008 N_("-w, --ignore-all-space ignore all white space"),
1009 N_("-B, --ignore-blank-lines ignore changes where lines are all blank"),
1010 N_("-I, --ignore-matching-lines=RE ignore changes where all lines match RE"),
1012 N_("-a, --text treat all files as text"),
1013 N_(" --strip-trailing-cr strip trailing carriage return on input"),
1014 #if O_BINARY
1015 N_(" --binary read and write data in binary mode"),
1016 #endif
1018 N_("-D, --ifdef=NAME output merged file with '#ifdef NAME' diffs"),
1019 N_(" --GTYPE-group-format=GFMT format GTYPE input groups with GFMT"),
1020 N_(" --line-format=LFMT format all input lines with LFMT"),
1021 N_(" --LTYPE-line-format=LFMT format LTYPE input lines with LFMT"),
1022 N_(" These format options provide fine-grained control over the output\n"
1023 " of diff, generalizing -D/--ifdef."),
1024 N_(" LTYPE is 'old', 'new', or 'unchanged'. GTYPE is LTYPE or 'changed'."),
1025 N_(" GFMT (only) may contain:\n\
1026 %< lines from FILE1\n\
1027 %> lines from FILE2\n\
1028 %= lines common to FILE1 and FILE2\n\
1029 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\
1030 LETTERs are as follows for new group, lower case for old group:\n\
1031 F first line number\n\
1032 L last line number\n\
1033 N number of lines = L-F+1\n\
1034 E F-1\n\
1035 M L+1\n\
1036 %(A=B?T:E) if A equals B then T else E"),
1037 N_(" LFMT (only) may contain:\n\
1038 %L contents of line\n\
1039 %l contents of line, excluding any trailing newline\n\
1040 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"),
1041 N_(" Both GFMT and LFMT may contain:\n\
1042 %% %\n\
1043 %c'C' the single character C\n\
1044 %c'\\OOO' the character with octal code OOO\n\
1045 C the character C (other characters represent themselves)"),
1047 N_("-d, --minimal try hard to find a smaller set of changes"),
1048 N_(" --horizon-lines=NUM keep NUM lines of the common prefix and suffix"),
1049 N_(" --speed-large-files assume large files and many scattered small changes"),
1050 N_(" --color[=WHEN] color output; WHEN is 'never', 'always', or 'auto';\n"
1051 " plain --color means --color='auto'"),
1052 N_(" --palette=PALETTE the colors to use when --color is active; PALETTE is\n"
1053 " a colon-separated list of terminfo capabilities"),
1055 N_(" --help display this help and exit"),
1056 N_("-v, --version output version information and exit"),
1058 N_("FILES are 'FILE1 FILE2' or 'DIR1 DIR2' or 'DIR FILE' or 'FILE DIR'."),
1059 N_("If --from-file or --to-file is given, there are no restrictions on FILE(s)."),
1060 N_("If a FILE is '-', read standard input."),
1061 N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
1062 nullptr
1065 static void
1066 usage (void)
1068 printf (_("Usage: %s [OPTION]... FILES\n"), squote (0, program_name));
1069 printf ("%s\n\n", _("Compare FILES line by line."));
1071 fputs (_("\
1072 Mandatory arguments to long options are mandatory for short options too.\n\
1073 "), stdout);
1075 for (char const *const *p = option_help_msgid; *p; p++)
1077 if (!**p)
1078 putchar ('\n');
1079 else
1081 char const *msg = _(*p);
1082 for (char const *nl; (nl = strchr (msg, '\n')); msg = nl + 1)
1084 fputs (" ", stdout);
1085 fwrite (msg, 1, nl + 1 - msg, stdout);
1088 if (*msg == ' ' || *msg == '-')
1089 fputs (" ", stdout);
1090 puts (msg);
1093 emit_bug_reporting_address ();
1096 /* Set VAR to VALUE, reporting an OPTION error if this is a
1097 conflict. */
1098 static void
1099 specify_value (char const **var, char const *value, char const *option)
1101 if (*var && ! STREQ (*var, value))
1103 error (0, 0, _("conflicting %s option value %s"), option, quote (value));
1104 try_help (nullptr, nullptr);
1106 *var = value;
1109 /* Set the output style to STYLE, diagnosing conflicts. */
1110 static void
1111 specify_style (enum output_style style)
1113 if (output_style != style)
1115 if (output_style != OUTPUT_UNSPECIFIED)
1116 try_help ("conflicting output style options", nullptr);
1117 output_style = style;
1121 /* Set the color mode. */
1122 static void
1123 specify_colors_style (char const *value)
1125 if (value == nullptr || STREQ (value, "auto"))
1126 colors_style = AUTO;
1127 else if (STREQ (value, "always"))
1128 colors_style = ALWAYS;
1129 else if (STREQ (value, "never"))
1130 colors_style = NEVER;
1131 else
1132 try_help ("invalid color %s", quote (value));
1136 /* True if PCMP's file F is a directory. */
1137 static bool
1138 dir_p (struct comparison const *pcmp, int f)
1140 return S_ISDIR (pcmp->file[f].stat.st_mode) != 0;
1143 /* If openat with O_NOFOLLOW fails because the file is a symlink,
1144 this platform sets errno to NOFOLLOW_SYMLINK_ERRNO.
1145 Although POSIX says errno must be ELOOP in that situation,
1146 FreeBSD and NetBSD behave more usefully. */
1147 enum { NOFOLLOW_SYMLINK_ERRNO =
1148 #ifdef __FreeBSD__
1149 EMLINK
1150 #elif defined __NetBSD__
1151 EFTYPE
1152 #else
1153 ELOOP
1154 #endif
1157 /* Compare two files with parent comparison PARENT.
1158 The two files are described by CMP, which has been prepped to contain
1159 the files' stat results, file types, and possibly descriptors.
1160 If the files need to be opened, use OPEN_FLAGS. */
1161 static int
1162 compare_prepped_files (struct comparison const *parent,
1163 struct comparison *cmp, int open_flags)
1165 /* If neither file "exists", there's nothing to compare. */
1166 if (cmp->file[0].desc == NONEXISTENT && cmp->file[1].desc == NONEXISTENT)
1167 return EXIT_SUCCESS;
1169 bool same_files = (cmp->file[0].desc != NONEXISTENT
1170 && cmp->file[1].desc != NONEXISTENT
1171 && cmp->file[0].filetype == cmp->file[1].filetype
1172 && same_file (&cmp->file[0].stat, &cmp->file[1].stat));
1174 /* If the two named files are actually the same physical file.
1175 we know they are identical without actually reading them. */
1176 if (same_files & no_diff_means_no_output)
1177 return EXIT_SUCCESS;
1179 bool toplevel = parent == &noparent;
1181 /* Compare the two hierarchies if both files are directories, or if
1182 diff is recursive and one file is a directory and the other
1183 pretends to be a directory full of empty files. But don't
1184 compare dir contents one level down unless -r was specified. */
1185 if (dir_p (cmp, 0) & dir_p (cmp, 1)
1186 || (recursive
1187 && ((new_file & dir_p (cmp, 1)
1188 && cmp->file[0].desc == NONEXISTENT)
1189 || (((new_file | unidirectional_new_file) & dir_p (cmp, 0))
1190 && cmp->file[1].desc == NONEXISTENT))))
1192 if (output_style == OUTPUT_IFDEF)
1193 fatal ("-D option not supported with directories");
1195 if (recursive | toplevel)
1196 return diff_dirs (cmp);
1197 else
1199 /* See POSIX 1003.1-2017 for this format. */
1200 message ("Common subdirectories: %s and %s\n",
1201 squote (0, cmp->file[0].name),
1202 squote (1, cmp->file[1].name));
1203 return EXIT_SUCCESS;
1207 /* Fail if only one file exists. */
1208 if ((cmp->file[0].desc == NONEXISTENT
1209 && ! (new_file | unidirectional_new_file))
1210 || (cmp->file[1].desc == NONEXISTENT && !new_file))
1212 bool existing = cmp->file[0].desc == NONEXISTENT;
1213 char const *dname = parent->file[existing].name;
1214 char const *bname = last_component (cmp->file[existing].name);
1216 /* See POSIX 1003.1-2017 for this format. */
1217 message ("Only in %s: %s\n", squote (0, dname), squote (1, bname));
1218 return EXIT_FAILURE;
1221 /* If the two files have different types, or have the same type but
1222 the type is unusual, then simply report their type.
1223 However, at the top level do this only if one file is a symlink
1224 and the other is not. */
1225 if (toplevel
1226 ? (!S_ISLNK (cmp->file[0].stat.st_mode)
1227 != !S_ISLNK (cmp->file[1].stat.st_mode))
1228 : (cmp->file[0].filetype != cmp->file[1].filetype
1229 || ! (S_ISREG (cmp->file[0].stat.st_mode)
1230 || S_ISLNK (cmp->file[0].stat.st_mode)
1231 || S_ISCHR (cmp->file[0].stat.st_mode)
1232 || S_ISBLK (cmp->file[0].stat.st_mode))))
1234 /* POSIX 1003.1-2017 says any message will do, so long as it
1235 contains the file names. */
1236 message ("File %s is a %s while file %s is a %s\n",
1237 file_label[0] ? file_label[0] : squote (0, cmp->file[0].name),
1238 gettext (cmp->file[0].filetype),
1239 file_label[1] ? file_label[1] : squote (1, cmp->file[1].name),
1240 gettext (cmp->file[1].filetype));
1242 return EXIT_FAILURE;
1245 /* If both files are symlinks, compare symlink contents. */
1246 if (S_ISLNK (cmp->file[0].stat.st_mode))
1248 /* We get here only if we are not dereferencing symlinks. */
1249 dassert (no_dereference_symlinks);
1251 int status = EXIT_SUCCESS;
1252 char *link_value[2]; link_value[1] = nullptr;
1253 char linkbuf[2][128];
1255 for (bool f = false; ; f = true)
1257 int linkfd = cmp->file[f].desc;
1258 int dirfd = parent->file[f].desc;
1259 char const *name = cmp->file[f].name;
1260 int dirarg = linkfd < 0 ? dirfd : linkfd;
1261 char const *namearg = (linkfd < 0
1262 ? (dirfd < 0 ? name : last_component (name))
1263 : "");
1264 link_value[f] = careadlinkat (dirarg, namearg,
1265 linkbuf[f], sizeof linkbuf[f],
1266 nullptr, readlinkat);
1267 if (!link_value[f])
1269 perror_with_name (cmp->file[f].name);
1270 status = EXIT_TROUBLE;
1271 break;
1273 if (f)
1275 status = (STREQ (link_value[0], link_value[f])
1276 ? EXIT_SUCCESS : EXIT_FAILURE);
1277 break;
1281 if (status == EXIT_FAILURE)
1282 message ("Symbolic links %s -> %s and %s -> %s differ\n",
1283 quote_n (0, cmp->file[0].name), quote_n (1, link_value[0]),
1284 quote_n (2, cmp->file[1].name), quote_n (3, link_value[1]));
1286 for (int f = 0; f < 2; f++)
1287 if (link_value[f] != linkbuf[f])
1288 free (link_value[f]);
1290 return status;
1293 /* When not at the top level, compare device numbers of special files,
1294 and report file types of all other non-regular files.
1295 POSIX 1003.1-2017 says any message will do,
1296 so long as it contains the file names. */
1297 if (!toplevel && !S_ISREG (cmp->file[0].stat.st_mode))
1299 if (cmp->file[0].stat.st_rdev == cmp->file[1].stat.st_rdev)
1300 return EXIT_SUCCESS;
1302 intmax_t num[] = {
1303 major (cmp->file[0].stat.st_rdev),
1304 minor (cmp->file[0].stat.st_rdev),
1305 major (cmp->file[1].stat.st_rdev),
1306 minor (cmp->file[1].stat.st_rdev)
1308 enum { n_num = sizeof num / sizeof *num };
1309 char numbuf[n_num][INT_BUFSIZE_BOUND (intmax_t)];
1310 for (int i = 0; i < n_num; i++)
1311 sprintf (numbuf[i], "%"PRIdMAX, num[i]);
1313 message ((S_ISCHR (cmp->file[0].stat.st_mode)
1314 ? ("Character special files %s (%s, %s)"
1315 " and %s (%s, %s) differ\n")
1316 : ("Block special files %s (%s, %s)"
1317 " and %s (%s, %s) differ\n")),
1318 quote_n (0, cmp->file[0].name), numbuf[0], numbuf[1],
1319 quote_n (2, cmp->file[1].name), numbuf[2], numbuf[3]);
1321 return EXIT_FAILURE;
1324 if (files_can_be_treated_as_binary
1325 && S_ISREG (cmp->file[0].stat.st_mode)
1326 && S_ISREG (cmp->file[1].stat.st_mode)
1327 && cmp->file[0].stat.st_size != cmp->file[1].stat.st_size
1328 && 0 <= cmp->file[0].stat.st_size
1329 && 0 <= cmp->file[1].stat.st_size)
1331 message ("Files %s and %s differ\n",
1332 file_label[0] ? file_label[0] : squote (0, cmp->file[0].name),
1333 file_label[1] ? file_label[1] : squote (1, cmp->file[1].name));
1334 return EXIT_FAILURE;
1337 /* Both files exist and neither is a directory or a symbolic link.
1338 Open the files and record their descriptors,
1339 if they are not already open. */
1341 int status = EXIT_SUCCESS;
1343 for (int f = 0; f < 2; f++)
1344 if (cmp->file[f].desc == UNOPENED)
1346 if (f && same_files)
1347 cmp->file[f].desc = cmp->file[0].desc;
1348 else
1350 int dirfd = parent->file[f].desc;
1351 char const *name = cmp->file[f].name;
1352 char const *nm = dirfd < 0 ? name : last_component (name);
1353 cmp->file[f].desc = openat (dirfd, nm, open_flags);
1354 if (cmp->file[f].desc < 0)
1356 perror_with_name (name);
1357 status = EXIT_TROUBLE;
1361 else if (cmp->file[f].desc == OPEN_FAILED)
1363 error (0, cmp->file[f].openerr, "%s", squote (0, cmp->file[f].name));
1364 status = EXIT_TROUBLE;
1367 /* Compare the files' contents, if no error was found. */
1369 if (status != EXIT_SUCCESS)
1370 return status;
1371 return diff_2_files (cmp);
1375 /* Compare two files (or dirs) with parent comparison PARENT,
1376 directory entries of type DETYPE, and names NAME0 and NAME1.
1377 (If PARENT == &NOPARENT, then the first name is just NAME0, etc.)
1378 This is self-contained; it opens the files and closes them.
1380 Names are relative to the original working directory. If a file
1381 appears in only one dir, the other name is a null pointer.
1383 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1384 different, EXIT_TROUBLE if there is a problem opening them. */
1387 compare_files (struct comparison const *parent, enum detype const detype[2],
1388 char const *name0,
1389 char const *name1)
1391 /* If this is directory comparison, perhaps we have a file
1392 that exists only in one of the directories.
1393 If so, just print a message to that effect. */
1395 if (! ((name0 && name1)
1396 || (unidirectional_new_file && name1)
1397 || new_file))
1399 char const *name = name0 ? name0 : name1;
1400 char const *dir = parent->file[!name0].name;
1402 /* See POSIX 1003.1-2017 for this format. */
1403 message ("Only in %s: %s\n", squote (0, dir), squote (1, name));
1405 /* Return EXIT_FAILURE so that diff_dirs will return
1406 EXIT_FAILURE ("some files differ"). */
1407 return EXIT_FAILURE;
1410 struct comparison cmp = { .file[0].desc = name0 ? UNOPENED : NONEXISTENT,
1411 .file[1].desc = name1 ? UNOPENED : NONEXISTENT,
1412 .file[0].stat.st_size = name0 ? -1 : 0,
1413 .file[1].stat.st_size = name1 ? -1 : 0,
1414 .parent = parent };
1416 /* Now record the full name of each file, including nonexistent ones. */
1418 if (!name0)
1419 name0 = name1;
1420 if (!name1)
1421 name1 = name0;
1423 char *free0;
1424 char *free1;
1425 bool toplevel = parent == &noparent;
1427 if (toplevel)
1429 free0 = nullptr;
1430 free1 = nullptr;
1431 cmp.file[0].name = name0;
1432 cmp.file[1].name = name1;
1434 else
1436 cmp.file[0].name = free0
1437 = file_name_concat (parent->file[0].name, name0, nullptr);
1438 cmp.file[1].name = free1
1439 = file_name_concat (parent->file[1].name, name1, nullptr);
1442 int oflags = ((binary ? O_BINARY : 0) | O_CLOEXEC
1443 | (no_dereference_symlinks ? O_NOFOLLOW : 0));
1445 /* Stat the files if needed, possibly opening them first if that is
1446 safe or will be done anyway. */
1448 for (int f = 0; f < 2; f++)
1450 int fd = cmp.file[f].desc;
1451 if (fd != UNOPENED)
1452 continue;
1454 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1456 cmp.file[f].desc = cmp.file[0].desc;
1457 cmp.file[f].filetype = cmp.file[0].filetype;
1458 cmp.file[f].stat = cmp.file[0].stat;
1459 continue;
1462 int parentdesc = parent->file[f].desc;
1463 char const *name = cmp.file[f].name;
1464 char const *nm = parentdesc < 0 ? name : last_component (name);
1465 int err = 0;
1467 if (STREQ (cmp.file[f].name, "-"))
1469 fd = STDIN_FILENO;
1470 if (binary && ! isatty (fd))
1471 set_binary_mode (fd, O_BINARY);
1473 else if (toplevel || detype[f] == DE_REG || detype[f] == DE_DIR
1474 || (O_PATH_DEFINED && detype[f] == DE_LNK
1475 && no_dereference_symlinks))
1477 /* Either we would open the file anyway because it's the top level,
1478 or the file is known to be a type that is
1479 safe to open and is likely to be opened anyway.
1480 Open the file now, as openat+fstat avoids an fstatat+openat race
1481 and might be a bit faster. */
1482 int accmode = ((O_PATH_DEFINED && !toplevel && detype[f] == DE_LNK
1483 && no_dereference_symlinks)
1484 ? O_PATHSEARCH : O_RDONLY);
1485 fd = openat (parentdesc, nm, accmode | oflags);
1486 if (fd < 0)
1488 err = get_errno ();
1490 /* 'diff DIR FILE' needs read access to DIR if
1491 --ignore-file-name-case; otherwise O_PATHSEARCH suffices.
1492 But do not check for this if ---no-directory. */
1493 if (err == EACCES && toplevel
1494 && !ignore_file_name_case && !no_directory
1495 && (f == 0 || !dir_p (&cmp, 0)))
1497 fd = openat (parentdesc, nm,
1498 O_PATHSEARCH | O_DIRECTORY | oflags);
1499 if (0 <= fd)
1500 err = 0;
1503 /* If it is a symlink, fstatat later. If it might be a
1504 symlink, play it safe and fstatat later. */
1505 if (err == NOFOLLOW_SYMLINK_ERRNO
1506 && (NOFOLLOW_SYMLINK_ERRNO != ELOOP
1507 || (no_dereference_symlinks
1508 && (detype[f] == DE_UNKNOWN
1509 || (detype[f] == DE_LNK
1510 && accmode == O_RDONLY)))))
1512 fd = UNOPENED;
1513 err = 0;
1516 cmp.file[f].openerr = err;
1520 /* Get the file's status unless an earlier error makes it unnecessary. */
1521 if (! (cmp.file[1 - f].err
1522 /* If openat failed as follows, fstatat would fail too. */
1523 || err == ENOENT || err == ENOTDIR || err == ELOOP
1524 || err == EOVERFLOW || err == ENAMETOOLONG))
1526 if ((fd < 0
1527 ? fstatat (parentdesc, nm, &cmp.file[f].stat,
1528 no_dereference_symlinks ? AT_SYMLINK_NOFOLLOW : 0)
1529 : fstat (fd, &cmp.file[f].stat))
1530 < 0)
1531 err = get_errno ();
1532 else
1534 err = 0;
1535 off_t size = stat_size (&cmp.file[f].stat);
1537 if (0 <= size && fd == STDIN_FILENO)
1539 off_t pos = lseek (fd, 0, SEEK_CUR);
1540 if (0 <= pos)
1541 size = MAX (0, size - pos);
1544 cmp.file[f].stat.st_size = size;
1545 cmp.file[f].filetype = c_file_type (&cmp.file[f].stat);
1549 cmp.file[f].desc = fd;
1550 cmp.file[f].err = err;
1553 if (toplevel)
1555 if (!no_directory && toplevel
1556 && !cmp.file[0].err && !cmp.file[1].err
1557 && dir_p (&cmp, 0) != dir_p (&cmp, 1))
1559 /* If one is a directory, use the file in that dir with the
1560 other file's basename. */
1562 int fnm_arg = dir_p (&cmp, 0);
1563 int dir_arg = 1 - fnm_arg;
1564 if (cmp.file[fnm_arg].desc == STDIN_FILENO)
1565 fatal ("cannot compare '-' to a directory");
1566 char const *fnm = cmp.file[fnm_arg].name;
1567 enum detype dir_detype;
1568 char const *filename = cmp.file[dir_arg].name = free0
1569 = find_dir_file_pathname (&cmp.file[dir_arg], last_component (fnm),
1570 &dir_detype);
1571 int dirfd = cmp.file[dir_arg].desc;
1572 if (dirfd < 0)
1573 dirfd = AT_FDCWD;
1574 char const *atname = dirfd < 0 ? filename : last_component (filename);
1575 cmp.file[dir_arg].desc = UNOPENED;
1576 noparent.file[dir_arg].desc = dirfd;
1577 cmp.file[dir_arg].desc
1578 = (dir_detype == DE_LNK && no_dereference_symlinks
1579 ? (errno = ELOOP, -1)
1580 : openat (dirfd, atname, O_RDONLY | oflags));
1581 if (O_PATH_DEFINED && cmp.file[dir_arg].desc < 0
1582 && (dir_detype == DE_LNK || dir_detype == DE_UNKNOWN)
1583 && no_dereference_symlinks && errno == NOFOLLOW_SYMLINK_ERRNO)
1584 cmp.file[dir_arg].desc = openat (dirfd, atname,
1585 O_PATHSEARCH | oflags);
1586 if (cmp.file[dir_arg].desc < 0
1587 ? (O_PATH_DEFINED || !no_dereference_symlinks
1588 || errno != NOFOLLOW_SYMLINK_ERRNO
1589 || (fstatat (dirfd, atname, &cmp.file[dir_arg].stat,
1590 AT_SYMLINK_NOFOLLOW)
1591 < 0))
1592 : fstat (cmp.file[dir_arg].desc, &cmp.file[dir_arg].stat) < 0)
1593 cmp.file[dir_arg].err = get_errno ();
1594 else
1596 cmp.file[dir_arg].stat.st_size
1597 = stat_size (&cmp.file[dir_arg].stat);
1598 cmp.file[dir_arg].filetype
1599 = c_file_type (&cmp.file[dir_arg].stat);
1603 /* Mark files as nonexistent as needed for -N and -P,
1604 if they do not exist but their counterparts do exist. */
1605 for (int f = 0; f < 2; f++)
1606 if ((new_file || (f == 0 && unidirectional_new_file))
1607 && (cmp.file[f].err == ENOENT || cmp.file[f].err == ENOTDIR)
1608 && ! (cmp.file[1 - f].err == ENOENT
1609 || cmp.file[1 - f].err == ENOTDIR))
1611 cmp.file[f].desc = NONEXISTENT;
1612 cmp.file[f].err = 0;
1616 for (int f = 0; f < 2; f++)
1617 if (cmp.file[f].desc == NONEXISTENT)
1619 cmp.file[f].filetype = cmp.file[1 - f].filetype;
1620 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1623 int status = EXIT_SUCCESS;
1625 for (int f = 0; f < 2; f++)
1626 if (cmp.file[f].err)
1628 error (0, cmp.file[f].err, "%s", squote (0, cmp.file[f].name));
1629 status = EXIT_TROUBLE;
1632 if (status == EXIT_SUCCESS)
1633 status = compare_prepped_files (parent, &cmp, O_RDONLY | oflags);
1635 /* Close any input files. */
1636 for (int f = 0; f < 2; f++)
1637 if ((f == 0 || cmp.file[f].desc != cmp.file[0].desc)
1638 && (cmp.file[f].dirstream ? closedir (cmp.file[f].dirstream) < 0
1639 : 0 <= cmp.file[f].desc && close (cmp.file[f].desc) < 0))
1641 perror_with_name (cmp.file[f].name);
1642 status = EXIT_TROUBLE;
1645 /* Now the comparison has been done, if no error prevented it,
1646 and STATUS is the value this function will return. */
1648 if (status == EXIT_SUCCESS)
1650 if (report_identical_files && !dir_p (&cmp, 0))
1651 message
1652 ("Files %s and %s are identical\n",
1653 file_label[0] ? file_label[0] : squote (0, cmp.file[0].name),
1654 file_label[1] ? file_label[1] : squote (1, cmp.file[1].name));
1656 else
1658 /* Flush stdout so that the user sees differences immediately.
1659 This can hurt performance, unfortunately. */
1660 if (fflush (stdout) != 0)
1661 pfatal_with_name (_("standard output"));
1664 free (free0);
1665 free (free1);
1667 return status;
1670 /* Define variables declared in diff.h (which see). */
1671 FILE *outfile;
1672 bool brief;
1673 bool expand_tabs;
1674 bool files_can_be_treated_as_binary;
1675 bool ignore_blank_lines;
1676 bool ignore_case;
1677 bool ignore_file_name_case;
1678 bool initial_tab;
1679 bool left_column;
1680 bool minimal;
1681 bool no_dereference_symlinks;
1682 bool no_diff_means_no_output;
1683 bool paginate;
1684 bool presume_output_tty;
1685 bool sdiff_merge_assist;
1686 bool speed_large_files;
1687 bool strip_trailing_cr;
1688 bool suppress_blank_empty;
1689 bool suppress_common_lines;
1690 bool text;
1691 char *file_label[2];
1692 char *switch_string;
1693 char const *group_format[CHANGED + 1];
1694 char const *line_format[NEW + 1];
1695 char const *starting_file;
1696 char const *time_format;
1697 enum DIFF_white_space ignore_white_space;
1698 enum colors_style colors_style;
1699 enum output_style output_style;
1700 intmax_t sdiff_column2_offset;
1701 intmax_t sdiff_half_width;
1702 intmax_t tabsize;
1703 lin context;
1704 lin horizon_lines;
1705 struct comparison curr;
1706 struct comparison noparent;
1707 struct exclude *excluded;
1708 struct re_pattern_buffer function_regexp;
1709 struct re_pattern_buffer ignore_regexp;
1710 #ifndef localtz
1711 timezone_t localtz;
1712 #endif