No empty .Rs/.Re
[netbsd-mini2440.git] / gnu / dist / diffutils / src / diff.c
blob6a8114f593cbe36872e3ac6b9c6e2bfdaae2a4ae
1 /* $NetBSD: diff.c,v 1.1.1.1 2003/01/26 00:43:16 wiz Exp $ */
3 /* diff - compare files line by line
5 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002
6 Free Software Foundation, Inc.
8 This file is part of GNU DIFF.
10 GNU DIFF is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
15 GNU DIFF is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 See the GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GNU DIFF; see the file COPYING.
22 If not, write to the Free Software Foundation,
23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 #define GDIFF_MAIN
26 #include "diff.h"
27 #include <c-stack.h>
28 #include <dirname.h>
29 #include <error.h>
30 #include <exclude.h>
31 #include <exitfail.h>
32 #include <fnmatch.h>
33 #include <freesoft.h>
34 #include <getopt.h>
35 #include <hard-locale.h>
36 #include <prepargs.h>
37 #include <quotesys.h>
38 #include <regex.h>
39 #include <setmode.h>
40 #include <xalloc.h>
42 static char const authorship_msgid[] =
43 N_("Written by Paul Eggert, Mike Haertel, David Hayes,\n\
44 Richard Stallman, and Len Tower.");
46 static char const copyright_string[] =
47 "Copyright (C) 2002 Free Software Foundation, Inc.";
49 #ifndef GUTTER_WIDTH_MINIMUM
50 # define GUTTER_WIDTH_MINIMUM 3
51 #endif
53 struct regexp_list
55 char *regexps; /* chars representing disjunction of the regexps */
56 size_t len; /* chars used in `regexps' */
57 size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */
58 bool multiple_regexps;/* Does `regexps' represent a disjunction? */
59 struct re_pattern_buffer *buf;
62 static int compare_files (struct comparison const *, char const *, char const *);
63 static void add_regexp (struct regexp_list *, char const *);
64 static void summarize_regexp_list (struct regexp_list *);
65 static void specify_style (enum output_style);
66 static void specify_value (char const **, char const *, char const *);
67 static void try_help (char const *, char const *) __attribute__((noreturn));
68 static void check_stdout (void);
69 static void usage (void);
71 /* If comparing directories, compare their common subdirectories
72 recursively. */
73 static bool recursive;
75 /* In context diffs, show previous lines that match these regexps. */
76 static struct regexp_list function_regexp_list;
78 /* Ignore changes affecting only lines that match these regexps. */
79 static struct regexp_list ignore_regexp_list;
81 #if HAVE_SETMODE_DOS
82 /* Use binary I/O when reading and writing data (--binary).
83 On POSIX hosts, this has no effect. */
84 static bool binary;
85 #endif
87 /* When comparing directories, if a file appears only in one
88 directory, treat it as present but empty in the other (-N).
89 Then `patch' would create the file with appropriate contents. */
90 static bool new_file;
92 /* When comparing directories, if a file appears only in the second
93 directory of the two, treat it as present but empty in the other
94 (--unidirectional-new-file).
95 Then `patch' would create the file with appropriate contents. */
96 static bool unidirectional_new_file;
98 /* Report files compared that are the same (-s).
99 Normally nothing is output when that happens. */
100 static bool report_identical_files;
103 /* Return a string containing the command options with which diff was invoked.
104 Spaces appear between what were separate ARGV-elements.
105 There is a space at the beginning but none at the end.
106 If there were no options, the result is an empty string.
108 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
109 the length of that vector. */
111 static char *
112 option_list (char **optionvec, int count)
114 int i;
115 size_t size = 1;
116 char *result;
117 char *p;
119 for (i = 0; i < count; i++)
120 size += 1 + quote_system_arg ((char *) 0, optionvec[i]);
122 p = result = xmalloc (size);
124 for (i = 0; i < count; i++)
126 *p++ = ' ';
127 p += quote_system_arg (p, optionvec[i]);
130 *p = 0;
131 return result;
135 /* Return an option value suitable for add_exclude. */
137 static int
138 exclude_options (void)
140 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
143 static char const shortopts[] =
144 "0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y";
146 /* Values for long options that do not have single-letter equivalents. */
147 enum
149 BINARY_OPTION = CHAR_MAX + 1,
150 FROM_FILE_OPTION,
151 HELP_OPTION,
152 HORIZON_LINES_OPTION,
153 IGNORE_FILE_NAME_CASE_OPTION,
154 INHIBIT_HUNK_MERGE_OPTION,
155 LEFT_COLUMN_OPTION,
156 LINE_FORMAT_OPTION,
157 NO_IGNORE_FILE_NAME_CASE_OPTION,
158 NORMAL_OPTION,
159 SDIFF_MERGE_ASSIST_OPTION,
160 STRIP_TRAILING_CR_OPTION,
161 SUPPRESS_COMMON_LINES_OPTION,
162 TO_FILE_OPTION,
164 /* These options must be in sequence. */
165 UNCHANGED_LINE_FORMAT_OPTION,
166 OLD_LINE_FORMAT_OPTION,
167 NEW_LINE_FORMAT_OPTION,
169 /* These options must be in sequence. */
170 UNCHANGED_GROUP_FORMAT_OPTION,
171 OLD_GROUP_FORMAT_OPTION,
172 NEW_GROUP_FORMAT_OPTION,
173 CHANGED_GROUP_FORMAT_OPTION
176 static char const group_format_option[][sizeof "--unchanged-group-format"] =
178 "--unchanged-group-format",
179 "--old-group-format",
180 "--new-group-format",
181 "--changed-group-format"
184 static char const line_format_option[][sizeof "--unchanged-line-format"] =
186 "--unchanged-line-format",
187 "--old-line-format",
188 "--new-line-format"
191 static struct option const longopts[] =
193 {"binary", 0, 0, BINARY_OPTION},
194 {"brief", 0, 0, 'q'},
195 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
196 {"context", 2, 0, 'C'},
197 {"ed", 0, 0, 'e'},
198 {"exclude", 1, 0, 'x'},
199 {"exclude-from", 1, 0, 'X'},
200 {"expand-tabs", 0, 0, 't'},
201 {"forward-ed", 0, 0, 'f'},
202 {"from-file", 1, 0, FROM_FILE_OPTION},
203 {"help", 0, 0, HELP_OPTION},
204 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
205 {"ifdef", 1, 0, 'D'},
206 {"ignore-all-space", 0, 0, 'w'},
207 {"ignore-blank-lines", 0, 0, 'B'},
208 {"ignore-case", 0, 0, 'i'},
209 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
210 {"ignore-matching-lines", 1, 0, 'I'},
211 {"ignore-space-change", 0, 0, 'b'},
212 {"ignore-tab-expansion", 0, 0, 'E'},
213 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
214 {"initial-tab", 0, 0, 'T'},
215 {"label", 1, 0, 'L'},
216 {"left-column", 0, 0, LEFT_COLUMN_OPTION},
217 {"line-format", 1, 0, LINE_FORMAT_OPTION},
218 {"minimal", 0, 0, 'd'},
219 {"new-file", 0, 0, 'N'},
220 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
221 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
222 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
223 {"normal", 0, 0, NORMAL_OPTION},
224 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
225 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
226 {"paginate", 0, 0, 'l'},
227 {"rcs", 0, 0, 'n'},
228 {"recursive", 0, 0, 'r'},
229 {"report-identical-files", 0, 0, 's'},
230 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
231 {"show-c-function", 0, 0, 'p'},
232 {"show-function-line", 1, 0, 'F'},
233 {"side-by-side", 0, 0, 'y'},
234 {"speed-large-files", 0, 0, 'H'},
235 {"starting-file", 1, 0, 'S'},
236 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
237 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
238 {"text", 0, 0, 'a'},
239 {"to-file", 1, 0, TO_FILE_OPTION},
240 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
241 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
242 {"unidirectional-new-file", 0, 0, 'P'},
243 {"unified", 2, 0, 'U'},
244 {"version", 0, 0, 'v'},
245 {"width", 1, 0, 'W'},
246 {0, 0, 0, 0}
250 main (int argc, char **argv)
252 int exit_status = EXIT_SUCCESS;
253 int c;
254 int i;
255 int prev = -1;
256 lin ocontext = -1;
257 bool explicit_context = 0;
258 int width = 0;
259 bool show_c_function = 0;
260 char const *from_file = 0;
261 char const *to_file = 0;
262 uintmax_t numval;
263 char *numend;
265 /* Do our initializations. */
266 exit_failure = 2;
267 initialize_main (&argc, &argv);
268 program_name = argv[0];
269 setlocale (LC_ALL, "");
270 bindtextdomain (PACKAGE, LOCALEDIR);
271 textdomain (PACKAGE);
272 c_stack_action (c_stack_die);
273 function_regexp_list.buf = &function_regexp;
274 ignore_regexp_list.buf = &ignore_regexp;
275 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
276 excluded = new_exclude ();
278 /* Decode the options. */
280 while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
282 switch (c)
284 case 0:
285 break;
287 case '0':
288 case '1':
289 case '2':
290 case '3':
291 case '4':
292 case '5':
293 case '6':
294 case '7':
295 case '8':
296 case '9':
297 if (! ISDIGIT (prev))
298 ocontext = c - '0';
299 else if (LIN_MAX / 10 < ocontext
300 || ((ocontext = 10 * ocontext + c - '0') < 0))
301 ocontext = LIN_MAX;
302 break;
304 case 'a':
305 text = 1;
306 break;
308 case 'b':
309 if (ignore_white_space < IGNORE_SPACE_CHANGE)
310 ignore_white_space = IGNORE_SPACE_CHANGE;
311 break;
313 case 'B':
314 ignore_blank_lines = 1;
315 break;
317 case 'C': /* +context[=lines] */
318 case 'U': /* +unified[=lines] */
320 if (optarg)
322 numval = strtoumax (optarg, &numend, 10);
323 if (*numend)
324 try_help ("invalid context length `%s'", optarg);
325 if (LIN_MAX < numval)
326 numval = LIN_MAX;
328 else
329 numval = 3;
331 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
332 if (context < numval)
333 context = numval;
334 explicit_context = 1;
336 break;
338 case 'c':
339 specify_style (OUTPUT_CONTEXT);
340 if (context < 3)
341 context = 3;
342 break;
344 case 'd':
345 minimal = 1;
346 break;
348 case 'D':
349 specify_style (OUTPUT_IFDEF);
351 static char const C_ifdef_group_formats[] =
352 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
353 char *b = xmalloc (sizeof C_ifdef_group_formats
354 + 7 * strlen (optarg) - 14 /* 7*"%s" */
355 - 8 /* 5*"%%" + 3*"%c" */);
356 sprintf (b, C_ifdef_group_formats,
358 optarg, optarg, 0,
359 optarg, optarg, 0,
360 optarg, optarg, optarg);
361 for (i = 0; i < sizeof group_format / sizeof *group_format; i++)
363 specify_value (&group_format[i], b, "-D");
364 b += strlen (b) + 1;
367 break;
369 case 'e':
370 specify_style (OUTPUT_ED);
371 break;
373 case 'E':
374 if (ignore_white_space < IGNORE_TAB_EXPANSION)
375 ignore_white_space = IGNORE_TAB_EXPANSION;
376 break;
378 case 'f':
379 specify_style (OUTPUT_FORWARD_ED);
380 break;
382 case 'F':
383 add_regexp (&function_regexp_list, optarg);
384 break;
386 case 'h':
387 /* Split the files into chunks for faster processing.
388 Usually does not change the result.
390 This currently has no effect. */
391 break;
393 case 'H':
394 speed_large_files = 1;
395 break;
397 case 'i':
398 ignore_case = 1;
399 break;
401 case 'I':
402 add_regexp (&ignore_regexp_list, optarg);
403 break;
405 case 'l':
406 if (!pr_program[0])
407 try_help ("pagination not supported on this host", 0);
408 paginate = 1;
409 #ifdef SIGCHLD
410 /* Pagination requires forking and waiting, and
411 System V fork+wait does not work if SIGCHLD is ignored. */
412 signal (SIGCHLD, SIG_DFL);
413 #endif
414 break;
416 case 'L':
417 if (!file_label[0])
418 file_label[0] = optarg;
419 else if (!file_label[1])
420 file_label[1] = optarg;
421 else
422 fatal ("too many file label options");
423 break;
425 case 'n':
426 specify_style (OUTPUT_RCS);
427 break;
429 case 'N':
430 new_file = 1;
431 break;
433 case 'p':
434 show_c_function = 1;
435 add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
436 break;
438 case 'P':
439 unidirectional_new_file = 1;
440 break;
442 case 'q':
443 brief = 1;
444 break;
446 case 'r':
447 recursive = 1;
448 break;
450 case 's':
451 report_identical_files = 1;
452 break;
454 case 'S':
455 specify_value (&starting_file, optarg, "-S");
456 break;
458 case 't':
459 expand_tabs = 1;
460 break;
462 case 'T':
463 initial_tab = 1;
464 break;
466 case 'u':
467 specify_style (OUTPUT_UNIFIED);
468 if (context < 3)
469 context = 3;
470 break;
472 case 'v':
473 printf ("diff %s\n%s\n\n%s\n\n%s\n",
474 version_string, copyright_string,
475 _(free_software_msgid), _(authorship_msgid));
476 check_stdout ();
477 return EXIT_SUCCESS;
479 case 'w':
480 ignore_white_space = IGNORE_ALL_SPACE;
481 break;
483 case 'x':
484 add_exclude (excluded, optarg, exclude_options ());
485 break;
487 case 'X':
488 if (add_exclude_file (add_exclude, excluded, optarg,
489 exclude_options (), '\n'))
490 pfatal_with_name (optarg);
491 break;
493 case 'y':
494 specify_style (OUTPUT_SDIFF);
495 break;
497 case 'W':
498 numval = strtoumax (optarg, &numend, 10);
499 if (! (0 < numval && numval <= INT_MAX) || *numend)
500 try_help ("invalid width `%s'", optarg);
501 if (width != numval)
503 if (width)
504 fatal ("conflicting width options");
505 width = numval;
507 break;
509 case BINARY_OPTION:
510 #if HAVE_SETMODE_DOS
511 binary = 1;
512 set_binary_mode (STDOUT_FILENO, 1);
513 #endif
514 break;
516 case FROM_FILE_OPTION:
517 specify_value (&from_file, optarg, "--from-file");
518 break;
520 case HELP_OPTION:
521 usage ();
522 check_stdout ();
523 return EXIT_SUCCESS;
525 case HORIZON_LINES_OPTION:
526 numval = strtoumax (optarg, &numend, 10);
527 if (*numend)
528 try_help ("invalid horizon length `%s'", optarg);
529 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
530 break;
532 case IGNORE_FILE_NAME_CASE_OPTION:
533 ignore_file_name_case = 1;
534 break;
536 case INHIBIT_HUNK_MERGE_OPTION:
537 /* This option is obsolete, but accept it for backward
538 compatibility. */
539 break;
541 case LEFT_COLUMN_OPTION:
542 left_column = 1;
543 break;
545 case LINE_FORMAT_OPTION:
546 specify_style (OUTPUT_IFDEF);
547 for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
548 specify_value (&line_format[i], optarg, "--line-format");
549 break;
551 case NO_IGNORE_FILE_NAME_CASE_OPTION:
552 ignore_file_name_case = 0;
553 break;
555 case NORMAL_OPTION:
556 specify_style (OUTPUT_NORMAL);
557 break;
559 case SDIFF_MERGE_ASSIST_OPTION:
560 specify_style (OUTPUT_SDIFF);
561 sdiff_merge_assist = 1;
562 break;
564 case STRIP_TRAILING_CR_OPTION:
565 strip_trailing_cr = 1;
566 break;
568 case SUPPRESS_COMMON_LINES_OPTION:
569 suppress_common_lines = 1;
570 break;
572 case TO_FILE_OPTION:
573 specify_value (&to_file, optarg, "--to-file");
574 break;
576 case UNCHANGED_LINE_FORMAT_OPTION:
577 case OLD_LINE_FORMAT_OPTION:
578 case NEW_LINE_FORMAT_OPTION:
579 specify_style (OUTPUT_IFDEF);
580 c -= UNCHANGED_LINE_FORMAT_OPTION;
581 specify_value (&line_format[c], optarg, line_format_option[c]);
582 break;
584 case UNCHANGED_GROUP_FORMAT_OPTION:
585 case OLD_GROUP_FORMAT_OPTION:
586 case NEW_GROUP_FORMAT_OPTION:
587 case CHANGED_GROUP_FORMAT_OPTION:
588 specify_style (OUTPUT_IFDEF);
589 c -= UNCHANGED_GROUP_FORMAT_OPTION;
590 specify_value (&group_format[c], optarg, group_format_option[c]);
591 break;
593 default:
594 try_help (0, 0);
596 prev = c;
599 if (output_style == OUTPUT_UNSPECIFIED)
601 if (show_c_function)
603 specify_style (OUTPUT_CONTEXT);
604 if (ocontext < 0)
605 context = 3;
607 else
608 specify_style (OUTPUT_NORMAL);
611 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
612 time_format = "%Y-%m-%d %H:%M:%S.%N %z";
613 else
615 /* See POSIX 1003.1-2001 for this format. */
616 time_format = "%a %b %e %T %Y";
619 if (0 <= ocontext)
621 bool modern_usage = 200112 <= posix2_version ();
623 if ((output_style == OUTPUT_CONTEXT
624 || output_style == OUTPUT_UNIFIED)
625 && (context < ocontext
626 || (ocontext < context && ! explicit_context)))
628 if (modern_usage)
630 error (0, 0,
631 _("`-%ld' option is obsolete; use `-%c %ld'"),
632 (long) ocontext,
633 output_style == OUTPUT_CONTEXT ? 'C' : 'U',
634 (long) ocontext);
635 try_help (0, 0);
637 context = ocontext;
639 else
641 if (modern_usage)
643 error (0, 0, _("`-%ld' option is obsolete; omit it"),
644 (long) ocontext);
645 try_help (0, 0);
652 * We maximize first the half line width, and then the gutter width,
653 * according to the following constraints:
654 * 1. Two half lines plus a gutter must fit in a line.
655 * 2. If the half line width is nonzero:
656 * a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
657 * b. If tabs are not expanded to spaces,
658 * a half line plus a gutter is an integral number of tabs,
659 * so that tabs in the right column line up.
661 unsigned int t = expand_tabs ? 1 : TAB_WIDTH;
662 int w = width ? width : 130;
663 int off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t;
664 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)),
665 sdiff_column2_offset = sdiff_half_width ? off : w;
668 /* Make the horizon at least as large as the context, so that
669 shift_boundaries has more freedom to shift the first and last hunks. */
670 if (horizon_lines < context)
671 horizon_lines = context;
673 summarize_regexp_list (&function_regexp_list);
674 summarize_regexp_list (&ignore_regexp_list);
676 if (output_style == OUTPUT_IFDEF)
678 for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
679 if (!line_format[i])
680 line_format[i] = "%l\n";
681 if (!group_format[OLD])
682 group_format[OLD]
683 = group_format[CHANGED] ? group_format[CHANGED] : "%<";
684 if (!group_format[NEW])
685 group_format[NEW]
686 = group_format[CHANGED] ? group_format[CHANGED] : "%>";
687 if (!group_format[UNCHANGED])
688 group_format[UNCHANGED] = "%=";
689 if (!group_format[CHANGED])
690 group_format[CHANGED] = concat (group_format[OLD],
691 group_format[NEW], "");
694 no_diff_means_no_output =
695 (output_style == OUTPUT_IFDEF ?
696 (!*group_format[UNCHANGED]
697 || (strcmp (group_format[UNCHANGED], "%=") == 0
698 && !*line_format[UNCHANGED]))
699 : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
701 files_can_be_treated_as_binary =
702 (brief
703 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
704 | (ignore_regexp_list.regexps || ignore_white_space)));
706 switch_string = option_list (argv + 1, optind - 1);
708 if (from_file)
710 if (to_file)
711 fatal ("--from-file and --to-file both specified");
712 else
713 for (; optind < argc; optind++)
715 int status = compare_files ((struct comparison *) 0,
716 from_file, argv[optind]);
717 if (exit_status < status)
718 exit_status = status;
721 else
723 if (to_file)
724 for (; optind < argc; optind++)
726 int status = compare_files ((struct comparison *) 0,
727 argv[optind], to_file);
728 if (exit_status < status)
729 exit_status = status;
731 else
733 if (argc - optind != 2)
735 if (argc - optind < 2)
736 try_help ("missing operand after `%s'", argv[argc - 1]);
737 else
738 try_help ("extra operand `%s'", argv[optind + 2]);
741 exit_status = compare_files ((struct comparison *) 0,
742 argv[optind], argv[optind + 1]);
746 /* Print any messages that were saved up for last. */
747 print_message_queue ();
749 check_stdout ();
750 exit (exit_status);
751 return exit_status;
754 /* Append to REGLIST the regexp PATTERN. */
756 static void
757 add_regexp (struct regexp_list *reglist, char const *pattern)
759 size_t patlen = strlen (pattern);
760 char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
762 if (m != 0)
763 error (0, 0, "%s: %s", pattern, m);
764 else
766 char *regexps = reglist->regexps;
767 size_t len = reglist->len;
768 bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
769 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
770 size_t size = reglist->size;
772 if (size <= newlen)
774 if (!size)
775 size = 1;
777 do size *= 2;
778 while (size <= newlen);
780 reglist->size = size;
781 reglist->regexps = regexps = xrealloc (regexps, size);
783 if (multiple_regexps)
785 regexps[len++] = '\\';
786 regexps[len++] = '|';
788 memcpy (regexps + len, pattern, patlen + 1);
792 /* Ensure that REGLIST represents the disjunction of its regexps.
793 This is done here, rather than earlier, to avoid O(N^2) behavior. */
795 static void
796 summarize_regexp_list (struct regexp_list *reglist)
798 if (reglist->regexps)
800 /* At least one regexp was specified. Allocate a fastmap for it. */
801 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
802 if (reglist->multiple_regexps)
804 /* Compile the disjunction of the regexps.
805 (If just one regexp was specified, it is already compiled.) */
806 char const *m = re_compile_pattern (reglist->regexps, reglist->len,
807 reglist->buf);
808 if (m != 0)
809 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
814 static void
815 try_help (char const *reason_msgid, char const *operand)
817 if (reason_msgid)
818 error (0, 0, _(reason_msgid), operand);
819 error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."),
820 program_name);
821 abort ();
824 static void
825 check_stdout (void)
827 if (ferror (stdout))
828 fatal ("write failed");
829 else if (fclose (stdout) != 0)
830 pfatal_with_name (_("standard output"));
833 static char const * const option_help_msgid[] = {
834 N_("Compare files line by line."),
836 N_("-i --ignore-case Ignore case differences in file contents."),
837 N_("--ignore-file-name-case Ignore case when comparing file names."),
838 N_("--no-ignore-file-name-case Consider case when comparing file names."),
839 N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."),
840 N_("-b --ignore-space-change Ignore changes in the amount of white space."),
841 N_("-w --ignore-all-space Ignore all white space."),
842 N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."),
843 N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."),
844 N_("--strip-trailing-cr Strip trailing carriage return on input."),
845 #if HAVE_SETMODE_DOS
846 N_("--binary Read and write data in binary mode."),
847 #endif
848 N_("-a --text Treat all files as text."),
850 N_("-c -C NUM --context[=NUM] Output NUM (default 3) lines of copied context.\n\
851 -u -U NUM --unified[=NUM] Output NUM (default 3) lines of unified context.\n\
852 --label LABEL Use LABEL instead of file name.\n\
853 -p --show-c-function Show which C function each change is in.\n\
854 -F RE --show-function-line=RE Show the most recent line matching RE."),
855 N_("-q --brief Output only whether files differ."),
856 N_("-e --ed Output an ed script."),
857 N_("--normal Output a normal diff."),
858 N_("-n --rcs Output an RCS format diff."),
859 N_("-y --side-by-side Output in two columns.\n\
860 -W NUM --width=NUM Output at most NUM (default 130) print columns.\n\
861 --left-column Output only the left column of common lines.\n\
862 --suppress-common-lines Do not output common lines."),
863 N_("-D NAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs."),
864 N_("--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT."),
865 N_("--line-format=LFMT Similar, but format all input lines with LFMT."),
866 N_("--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT."),
867 N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."),
868 N_(" GFMT may contain:\n\
869 %< lines from FILE1\n\
870 %> lines from FILE2\n\
871 %= lines common to FILE1 and FILE2\n\
872 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\
873 LETTERs are as follows for new group, lower case for old group:\n\
874 F first line number\n\
875 L last line number\n\
876 N number of lines = L-F+1\n\
877 E F-1\n\
878 M L+1"),
879 N_(" LFMT may contain:\n\
880 %L contents of line\n\
881 %l contents of line, excluding any trailing newline\n\
882 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"),
883 N_(" Either GFMT or LFMT may contain:\n\
884 %% %\n\
885 %c'C' the single character C\n\
886 %c'\\OOO' the character with octal code OOO"),
888 N_("-l --paginate Pass the output through `pr' to paginate it."),
889 N_("-t --expand-tabs Expand tabs to spaces in output."),
890 N_("-T --initial-tab Make tabs line up by prepending a tab."),
892 N_("-r --recursive Recursively compare any subdirectories found."),
893 N_("-N --new-file Treat absent files as empty."),
894 N_("--unidirectional-new-file Treat absent first files as empty."),
895 N_("-s --report-identical-files Report when two files are the same."),
896 N_("-x PAT --exclude=PAT Exclude files that match PAT."),
897 N_("-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE."),
898 N_("-S FILE --starting-file=FILE Start with FILE when comparing directories."),
899 N_("--from-file=FILE1 Compare FILE1 to all operands. FILE1 can be a directory."),
900 N_("--to-file=FILE2 Compare all operands to FILE2. FILE2 can be a directory."),
902 N_("--horizon-lines=NUM Keep NUM lines of the common prefix and suffix."),
903 N_("-d --minimal Try hard to find a smaller set of changes."),
904 N_("--speed-large-files Assume large files and many scattered small changes."),
906 N_("-v --version Output version info."),
907 N_("--help Output this help."),
909 N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."),
910 N_("If --from-file or --to-file is given, there are no restrictions on FILES."),
911 N_("If a FILE is `-', read standard input."),
913 N_("Report bugs to <bug-gnu-utils@gnu.org>."),
917 static void
918 usage (void)
920 char const * const *p;
922 printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
924 for (p = option_help_msgid; *p; p++)
926 if (!**p)
927 putchar ('\n');
928 else
930 char const *msg = _(*p);
931 char const *nl;
932 while ((nl = strchr (msg, '\n')))
934 int msglen = nl + 1 - msg;
935 printf (" %.*s", msglen, msg);
936 msg = nl + 1;
939 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
944 /* Set VAR to VALUE, reporting an OPTION error if this is a
945 conflict. */
946 static void
947 specify_value (char const **var, char const *value, char const *option)
949 if (*var && strcmp (*var, value) != 0)
951 error (0, 0, _("conflicting %s option value `%s'"), option, value);
952 try_help (0, 0);
954 *var = value;
957 /* Set the output style to STYLE, diagnosing conflicts. */
958 static void
959 specify_style (enum output_style style)
961 if (output_style != style)
963 if (output_style != OUTPUT_UNSPECIFIED)
964 try_help ("conflicting output style options", 0);
965 output_style = style;
969 static char const *
970 filetype (struct stat const *st)
972 /* See POSIX 1003.1-2001 for these formats.
974 To keep diagnostics grammatical in English, the returned string
975 must start with a consonant. */
977 if (S_ISREG (st->st_mode))
978 return st->st_size == 0 ? _("regular empty file") : _("regular file");
980 if (S_ISDIR (st->st_mode)) return _("directory");
982 #ifdef S_ISBLK
983 if (S_ISBLK (st->st_mode)) return _("block special file");
984 #endif
985 #ifdef S_ISCHR
986 if (S_ISCHR (st->st_mode)) return _("character special file");
987 #endif
988 #ifdef S_ISFIFO
989 if (S_ISFIFO (st->st_mode)) return _("fifo");
990 #endif
991 /* S_ISLNK is impossible with `fstat' and `stat'. */
992 #ifdef S_ISSOCK
993 if (S_ISSOCK (st->st_mode)) return _("socket");
994 #endif
995 #ifdef S_TYPEISMQ
996 if (S_TYPEISMQ (st)) return _("message queue");
997 #endif
998 #ifdef S_TYPEISSEM
999 if (S_TYPEISSEM (st)) return _("semaphore");
1000 #endif
1001 #ifdef S_TYPEISSHM
1002 if (S_TYPEISSHM (st)) return _("shared memory object");
1003 #endif
1004 #ifdef S_TYPEISTMO
1005 if (S_TYPEISTMO (st)) return _("typed memory object");
1006 #endif
1008 return _("weird file");
1011 /* Set the last-modified time of *ST to be the current time. */
1013 static void
1014 set_mtime_to_now (struct stat *st)
1016 #ifdef ST_MTIM_NSEC
1018 # if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME
1019 if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0)
1020 return;
1021 # endif
1023 # if HAVE_GETTIMEOFDAY
1025 struct timeval timeval;
1026 if (gettimeofday (&timeval, NULL) == 0)
1028 st->st_mtime = timeval.tv_sec;
1029 st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000;
1030 return;
1033 # endif
1035 #endif /* ST_MTIM_NSEC */
1037 time (&st->st_mtime);
1040 /* Compare two files (or dirs) with parent comparison PARENT
1041 and names NAME0 and NAME1.
1042 (If PARENT is 0, then the first name is just NAME0, etc.)
1043 This is self-contained; it opens the files and closes them.
1045 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1046 different, EXIT_TROUBLE if there is a problem opening them. */
1048 static int
1049 compare_files (struct comparison const *parent,
1050 char const *name0,
1051 char const *name1)
1053 struct comparison cmp;
1054 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
1055 register int f;
1056 int status = EXIT_SUCCESS;
1057 bool same_files;
1058 char *free0, *free1;
1060 /* If this is directory comparison, perhaps we have a file
1061 that exists only in one of the directories.
1062 If so, just print a message to that effect. */
1064 if (! ((name0 && name1)
1065 || (unidirectional_new_file && name1)
1066 || new_file))
1068 char const *name = name0 == 0 ? name1 : name0;
1069 char const *dir = parent->file[name0 == 0].name;
1071 /* See POSIX 1003.1-2001 for this format. */
1072 message ("Only in %s: %s\n", dir, name);
1074 /* Return EXIT_FAILURE so that diff_dirs will return
1075 EXIT_FAILURE ("some files differ"). */
1076 return EXIT_FAILURE;
1079 memset (cmp.file, 0, sizeof cmp.file);
1080 cmp.parent = parent;
1082 /* cmp.file[f].desc markers */
1083 #define NONEXISTENT (-1) /* nonexistent file */
1084 #define UNOPENED (-2) /* unopened file (e.g. directory) */
1085 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
1087 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
1089 cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED;
1090 cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED;
1092 /* Now record the full name of each file, including nonexistent ones. */
1094 if (name0 == 0)
1095 name0 = name1;
1096 if (name1 == 0)
1097 name1 = name0;
1099 if (!parent)
1101 free0 = 0;
1102 free1 = 0;
1103 cmp.file[0].name = name0;
1104 cmp.file[1].name = name1;
1106 else
1108 cmp.file[0].name = free0
1109 = dir_file_pathname (parent->file[0].name, name0);
1110 cmp.file[1].name = free1
1111 = dir_file_pathname (parent->file[1].name, name1);
1114 /* Stat the files. */
1116 for (f = 0; f < 2; f++)
1118 if (cmp.file[f].desc != NONEXISTENT)
1120 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1122 cmp.file[f].desc = cmp.file[0].desc;
1123 cmp.file[f].stat = cmp.file[0].stat;
1125 else if (strcmp (cmp.file[f].name, "-") == 0)
1127 cmp.file[f].desc = STDIN_FILENO;
1128 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
1129 cmp.file[f].desc = ERRNO_ENCODE (errno);
1130 else
1132 if (S_ISREG (cmp.file[f].stat.st_mode))
1134 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1135 if (pos < 0)
1136 cmp.file[f].desc = ERRNO_ENCODE (errno);
1137 else
1138 cmp.file[f].stat.st_size =
1139 MAX (0, cmp.file[f].stat.st_size - pos);
1142 /* POSIX 1003.1-2001 requires current time for
1143 stdin. */
1144 set_mtime_to_now (&cmp.file[f].stat);
1147 else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0)
1148 cmp.file[f].desc = ERRNO_ENCODE (errno);
1152 /* Mark files as nonexistent at the top level as needed for -N and
1153 --unidirectional-new-file. */
1154 if (! parent)
1156 if ((new_file | unidirectional_new_file)
1157 && cmp.file[0].desc == ERRNO_ENCODE (ENOENT)
1158 && cmp.file[1].desc == UNOPENED)
1159 cmp.file[0].desc = NONEXISTENT;
1161 if (new_file
1162 && cmp.file[0].desc == UNOPENED
1163 && cmp.file[1].desc == ERRNO_ENCODE (ENOENT))
1164 cmp.file[1].desc = NONEXISTENT;
1167 for (f = 0; f < 2; f++)
1168 if (cmp.file[f].desc == NONEXISTENT)
1169 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1171 for (f = 0; f < 2; f++)
1173 int e = ERRNO_DECODE (cmp.file[f].desc);
1174 if (0 <= e)
1176 errno = e;
1177 perror_with_name (cmp.file[f].name);
1178 status = EXIT_TROUBLE;
1182 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
1184 /* If one is a directory, and it was specified in the command line,
1185 use the file in that dir with the other file's basename. */
1187 int fnm_arg = DIR_P (0);
1188 int dir_arg = 1 - fnm_arg;
1189 char const *fnm = cmp.file[fnm_arg].name;
1190 char const *dir = cmp.file[dir_arg].name;
1191 char const *filename = cmp.file[dir_arg].name = free0
1192 = dir_file_pathname (dir, base_name (fnm));
1194 if (strcmp (fnm, "-") == 0)
1195 fatal ("cannot compare `-' to a directory");
1197 if (stat (filename, &cmp.file[dir_arg].stat) != 0)
1199 perror_with_name (filename);
1200 status = EXIT_TROUBLE;
1204 if (status != EXIT_SUCCESS)
1206 /* One of the files should exist but does not. */
1208 else if ((same_files
1209 = (cmp.file[0].desc != NONEXISTENT
1210 && cmp.file[1].desc != NONEXISTENT
1211 && (same_special_file (&cmp.file[0].stat, &cmp.file[1].stat)
1212 || (0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
1213 && same_file_attributes (&cmp.file[0].stat,
1214 &cmp.file[1].stat)))))
1215 && no_diff_means_no_output)
1217 /* The two named files are actually the same physical file.
1218 We know they are identical without actually reading them. */
1220 else if (DIR_P (0) & DIR_P (1))
1222 if (output_style == OUTPUT_IFDEF)
1223 fatal ("-D option not supported with directories");
1225 /* If both are directories, compare the files in them. */
1227 if (parent && !recursive)
1229 /* But don't compare dir contents one level down
1230 unless -r was specified.
1231 See POSIX 1003.1-2001 for this format. */
1232 message ("Common subdirectories: %s and %s\n",
1233 cmp.file[0].name, cmp.file[1].name);
1235 else
1236 status = diff_dirs (&cmp, compare_files);
1238 else if ((DIR_P (0) | DIR_P (1))
1239 || (parent
1240 && (! S_ISREG (cmp.file[0].stat.st_mode)
1241 || ! S_ISREG (cmp.file[1].stat.st_mode))))
1243 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
1245 /* We have a subdirectory that exists only in one directory. */
1247 if ((DIR_P (0) | DIR_P (1))
1248 && recursive
1249 && (new_file
1250 || (unidirectional_new_file
1251 && cmp.file[0].desc == NONEXISTENT)))
1252 status = diff_dirs (&cmp, compare_files);
1253 else
1255 char const *dir
1256 = parent->file[cmp.file[0].desc == NONEXISTENT].name;
1258 /* See POSIX 1003.1-2001 for this format. */
1259 message ("Only in %s: %s\n", dir, name0);
1261 status = EXIT_FAILURE;
1264 else
1266 /* We have two files that are not to be compared. */
1268 /* See POSIX 1003.1-2001 for this format. */
1269 message5 ("File %s is a %s while file %s is a %s\n",
1270 file_label[0] ? file_label[0] : cmp.file[0].name,
1271 filetype (&cmp.file[0].stat),
1272 file_label[1] ? file_label[1] : cmp.file[1].name,
1273 filetype (&cmp.file[1].stat));
1275 /* This is a difference. */
1276 status = EXIT_FAILURE;
1279 else if (files_can_be_treated_as_binary
1280 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size
1281 && (cmp.file[0].desc == NONEXISTENT
1282 || S_ISREG (cmp.file[0].stat.st_mode))
1283 && (cmp.file[1].desc == NONEXISTENT
1284 || S_ISREG (cmp.file[1].stat.st_mode)))
1286 message ("Files %s and %s differ\n",
1287 file_label[0] ? file_label[0] : cmp.file[0].name,
1288 file_label[1] ? file_label[1] : cmp.file[1].name);
1289 status = EXIT_FAILURE;
1291 else
1293 /* Both exist and neither is a directory. */
1295 /* Open the files and record their descriptors. */
1297 if (cmp.file[0].desc == UNOPENED)
1298 if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0)
1300 perror_with_name (cmp.file[0].name);
1301 status = EXIT_TROUBLE;
1303 if (cmp.file[1].desc == UNOPENED)
1305 if (same_files)
1306 cmp.file[1].desc = cmp.file[0].desc;
1307 else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0))
1308 < 0)
1310 perror_with_name (cmp.file[1].name);
1311 status = EXIT_TROUBLE;
1315 #if HAVE_SETMODE_DOS
1316 if (binary)
1317 for (f = 0; f < 2; f++)
1318 if (0 <= cmp.file[f].desc)
1319 set_binary_mode (cmp.file[f].desc, 1);
1320 #endif
1322 /* Compare the files, if no error was found. */
1324 if (status == EXIT_SUCCESS)
1325 status = diff_2_files (&cmp);
1327 /* Close the file descriptors. */
1329 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
1331 perror_with_name (cmp.file[0].name);
1332 status = EXIT_TROUBLE;
1334 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
1335 && close (cmp.file[1].desc) != 0)
1337 perror_with_name (cmp.file[1].name);
1338 status = EXIT_TROUBLE;
1342 /* Now the comparison has been done, if no error prevented it,
1343 and STATUS is the value this function will return. */
1345 if (status == EXIT_SUCCESS)
1347 if (report_identical_files && !DIR_P (0))
1348 message ("Files %s and %s are identical\n",
1349 file_label[0] ? file_label[0] : cmp.file[0].name,
1350 file_label[1] ? file_label[1] : cmp.file[1].name);
1352 else
1354 /* Flush stdout so that the user sees differences immediately.
1355 This can hurt performance, unfortunately. */
1356 if (fflush (stdout) != 0)
1357 pfatal_with_name (_("standard output"));
1360 if (free0)
1361 free (free0);
1362 if (free1)
1363 free (free1);
1365 return status;