1 /* GNU cmp - compare two files byte by byte
3 Copyright (C) 1990-1996, 1998, 2001-2002, 2004, 2006-2007, 2009-2013,
4 2015-2025 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include <binary-io.h>
29 #include <file-type.h>
31 #include <hard-locale.h>
34 #include <unlocked-io.h>
35 #include <version-etc.h>
42 /* The official name of this program (e.g., no 'g' prefix). */
43 static char const PROGRAM_NAME
[] = "cmp";
46 proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
50 hard_locale_LC_MESSAGES (void)
52 #if defined LC_MESSAGES && ENABLE_NLS
53 return hard_locale (LC_MESSAGES
);
59 static int cmp (void);
60 static off_t
file_position (int);
61 static idx_t
block_compare (word
const *, word
const *) ATTRIBUTE_PURE
;
62 static idx_t
count_newlines (char *, idx_t
);
63 static void sprintc (char *, unsigned char);
65 /* Filenames of the compared files. */
66 static char const *file
[2];
68 /* File descriptors of the files. */
69 static int file_desc
[2];
71 /* Status of the files. If st_size is -1, stat_buf[i] is valid except
72 that the file size is unspecified. If st_size is -2, the rest of
73 stat_buf[i] is unspecified except that st_blksize (if it exists) is
74 a reasonable guess. */
75 static struct stat stat_buf
[2];
77 /* Read buffers for the files. */
78 static word
*buffer
[2];
80 /* Optimal block size for the files. */
81 static idx_t buf_size
;
83 /* Initial prefix to ignore for each file, or negative if the user
84 requested to ignore more than TYPE_MAXIMUM (intmax_t) bytes. */
85 static intmax_t ignore_initial
[2];
87 /* Number of bytes to compare. INTMAX_MAX is effectively infinity,
88 since there's no practical way on current computers to compare so
89 many bytes. Even if cmp added SEEK_HOLE and SEEK_DATA optimization,
90 regular files can't have more than TYPE_MAXIMUM (off_t) bytes
91 and special files are unlikely to support this optimization. */
92 static intmax_t bytes
= INTMAX_MAX
;
95 static enum comparison_type
97 type_first_diff
, /* Print the first difference. */
98 type_all_diffs
, /* Print all differences. */
99 type_no_stdout
, /* Do not output to stdout; only stderr. */
100 type_status
/* Exit status only. */
103 /* If nonzero, print values of bytes quoted like cat -t does. */
104 static bool opt_print_bytes
;
106 /* Values for long options that do not have single-letter equivalents. */
109 HELP_OPTION
= CHAR_MAX
+ 1
112 static char const shortopts
[] = "bci:ln:sv";
113 static struct option
const longopts
[] =
115 {"print-bytes", 0, 0, 'b'},
116 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
117 {"ignore-initial", 1, 0, 'i'},
118 {"verbose", 0, 0, 'l'},
119 {"bytes", 1, 0, 'n'},
120 {"silent", 0, 0, 's'},
121 {"quiet", 0, 0, 's'},
122 {"version", 0, 0, 'v'},
123 {"help", 0, 0, HELP_OPTION
},
127 static char const valid_suffixes
[] = "kKMGTPEZY0";
129 /* Update ignore_initial[F] according to the result of parsing an
130 *operand ARGPTR of --ignore-initial, updating *ARGPTR to point
131 *after the operand. If DELIMITER is nonzero, the operand may be
132 *followed by DELIMITER; otherwise it must be null-terminated. */
134 specify_ignore_initial (int f
, char **argptr
, char delimiter
)
137 char const *arg
= *argptr
;
138 strtol_error d
= xstrtoimax (arg
, argptr
, 0, &val
, valid_suffixes
);
139 strtol_error e
= d
& ~LONGINT_OVERFLOW
;
140 if (! ((e
== LONGINT_OK
141 || (e
== LONGINT_INVALID_SUFFIX_CHAR
&& **argptr
== delimiter
))
143 try_help ("invalid --ignore-initial value %s", quote (arg
));
144 if (0 <= ignore_initial
[f
] && ignore_initial
[f
] < val
)
145 ignore_initial
[f
] = d
== e
? val
: -1;
148 /* Specify the output format. */
150 specify_comparison_type (enum comparison_type t
)
152 if (comparison_type
&& comparison_type
!= t
)
153 try_help ("options -l and -s are incompatible", nullptr);
161 error (EXIT_TROUBLE
, 0, "%s", _("write failed"));
162 else if (fclose (stdout
) != 0)
163 error (EXIT_TROUBLE
, errno
, "%s", _("standard output"));
166 static char const *const option_help_msgid
[] = {
167 N_("-b, --print-bytes print differing bytes"),
168 N_("-i, --ignore-initial=SKIP skip first SKIP bytes of both inputs"),
169 N_("-i, --ignore-initial=SKIP1:SKIP2 skip first SKIP1 bytes of FILE1 and\n"
170 " first SKIP2 bytes of FILE2"),
171 N_("-l, --verbose output byte numbers and differing byte values"),
172 N_("-n, --bytes=LIMIT compare at most LIMIT bytes"),
173 N_("-s, --quiet, --silent suppress all normal output"),
174 N_(" --help display this help and exit"),
175 N_("-v, --version output version information and exit"),
182 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
183 squote (0, program_name
));
184 puts (_("Compare two files byte by byte."));
186 _("The optional SKIP1 and SKIP2 specify the number of bytes to skip\n"
187 "at the beginning of each file (zero by default)."));
190 Mandatory arguments to long options are mandatory for short options too.\n\
192 for (char const *const *p
= option_help_msgid
; *p
; p
++)
193 printf (" %s\n", _(*p
));
194 printf ("\n%s\n\n%s\n%s\n",
195 _("SKIP values may be followed by the following multiplicative suffixes:\n\
196 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
197 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
198 _("If a FILE is '-' or missing, read standard input."),
199 _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."));
200 emit_bug_reporting_address ();
204 main (int argc
, char **argv
)
206 exit_failure
= EXIT_TROUBLE
;
207 initialize_main (&argc
, &argv
);
208 set_program_name (argv
[0]);
209 setlocale (LC_ALL
, "");
210 bindtextdomain (PACKAGE
, LOCALEDIR
);
211 textdomain (PACKAGE
);
212 c_stack_action (nullptr);
215 /* Parse command line options. */
218 0 <= (c
= getopt_long (argc
, argv
, shortopts
, longopts
, nullptr)); )
222 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
223 opt_print_bytes
= true;
227 specify_ignore_initial (0, &optarg
, ':');
228 if (*optarg
++ == ':')
229 specify_ignore_initial (1, &optarg
, 0);
230 else if (ignore_initial
[1] < ignore_initial
[0] || ignore_initial
[0] < 0)
231 ignore_initial
[1] = ignore_initial
[0];
235 specify_comparison_type (type_all_diffs
);
241 strtol_error e
= xstrtoimax (optarg
, nullptr, 0, &n
, valid_suffixes
);
242 if ((e
& ~LONGINT_OVERFLOW
) != LONGINT_OK
|| n
< 0)
243 try_help ("invalid --bytes value %s", quote (optarg
));
244 bytes
= MIN (bytes
, n
);
249 specify_comparison_type (type_status
);
253 version_etc (stdout
, PROGRAM_NAME
, PACKAGE_NAME
, Version
,
264 try_help (nullptr, nullptr);
268 try_help ("missing operand after %s", quote (argv
[argc
- 1]));
270 file
[0] = argv
[optind
++];
271 file
[1] = optind
< argc
? argv
[optind
++] : "-";
273 for (int f
= 0; f
< 2 && optind
< argc
; f
++)
275 char *arg
= argv
[optind
++];
276 specify_ignore_initial (f
, &arg
, 0);
280 try_help ("extra operand %s", quote (argv
[optind
]));
282 for (int f
= 0; f
< 2; f
++)
284 /* Two files with the same name and offset are identical.
285 But wait until we open the file once, for proper diagnostics. */
286 if (f
&& 0 <= ignore_initial
[0] && ignore_initial
[0] == ignore_initial
[1]
287 && file_name_cmp (file
[0], file
[1]) == 0)
290 if (STREQ (file
[f
], "-"))
292 file_desc
[f
] = STDIN_FILENO
;
293 if (O_BINARY
&& ! isatty (STDIN_FILENO
))
294 set_binary_mode (STDIN_FILENO
, O_BINARY
);
298 file_desc
[f
] = open (file
[f
], O_RDONLY
| O_BINARY
| O_CLOEXEC
);
300 if (file_desc
[f
] < 0)
302 if (comparison_type
!= type_status
)
303 error (0, errno
, "%s", squote (0, file
[f
]));
308 if (fstat (file_desc
[f
], stat_buf
+ f
) < 0)
310 stat_buf
[f
].st_size
= -2;
311 #if HAVE_STRUCT_STAT_ST_BLKSIZE
312 stat_buf
[f
].st_blksize
= 8 * 1024;
316 stat_buf
[f
].st_size
= stat_size (&stat_buf
[f
]);
319 /* If the files are the same and have the same file position,
320 the contents are identical. */
322 if (-1 <= stat_buf
[0].st_size
&& -1 <= stat_buf
[1].st_size
323 && same_file (&stat_buf
[0], &stat_buf
[1])
324 && file_position (0) == file_position (1))
327 /* If output is redirected to the null device, we can avoid some of
330 if (comparison_type
!= type_status
)
332 struct stat outstat
, nullstat
;
334 if (fstat (STDOUT_FILENO
, &outstat
) == 0
335 && S_ISCHR (outstat
.st_mode
)
336 && stat (NULL_DEVICE
, &nullstat
) == 0
337 && same_file (&outstat
, &nullstat
))
338 comparison_type
= type_no_stdout
;
341 /* If no output is needed,
342 and both input descriptors are associated with plain files,
343 and the file sizes are nonzero so they are not Linux /proc files,
344 conclude that the files differ if they have different sizes
345 and if more bytes will be compared than are in the smaller file. */
347 if (type_no_stdout
<= comparison_type
348 && 0 <= stat_buf
[0].st_size
&& S_ISREG (stat_buf
[0].st_mode
)
349 && 0 <= stat_buf
[1].st_size
&& S_ISREG (stat_buf
[1].st_mode
))
351 off_t pos0
= file_position (0);
354 off_t pos1
= file_position (1);
357 off_t s0
= stat_buf
[0].st_size
- pos0
;
358 off_t s1
= stat_buf
[1].st_size
- pos1
;
363 if (s0
!= s1
&& MIN (s0
, s1
) < bytes
)
369 /* Guess a good block size for the files. */
372 for (int f
= 0; f
< 2; f
++)
373 if (ST_BLKSIZE (stat_buf
[0]) < 0
374 || ckd_add (&blksize
[f
], ST_BLKSIZE (stat_buf
[0]), 0))
376 buf_size
= buffer_lcm (blksize
[0], blksize
[1], IDX_MAX
- sizeof (word
));
378 /* Allocate word-aligned buffers, with space for sentinels at the end. */
380 idx_t words_per_buffer
= (buf_size
+ 2 * sizeof (word
) - 1) / sizeof (word
);
381 buffer
[0] = xinmalloc (words_per_buffer
, 2 * sizeof (word
));
382 buffer
[1] = buffer
[0] + words_per_buffer
;
384 int exit_status
= cmp ();
386 for (int f
= 0; f
< 2; f
++)
387 if (close (file_desc
[f
]) != 0)
388 error (EXIT_TROUBLE
, errno
, "%s", squote (0, file
[f
]));
389 if (exit_status
!= EXIT_SUCCESS
&& comparison_type
< type_no_stdout
)
394 /* Compare the two files already open on 'file_desc[0]' and 'file_desc[1]',
395 using 'buffer[0]' and 'buffer[1]'.
396 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
402 word
*buffer0
= buffer
[0];
403 word
*buffer1
= buffer
[1];
404 char *buf0
= (char *) buffer0
;
405 char *buf1
= (char *) buffer1
;
407 /* For -l, the print width of the offset, a positive number.
408 Otherwise, the negative of the comparison type.
409 This portmanteauization pacifies gcc -Wmaybe-uninitialized. */
412 if (comparison_type
== type_all_diffs
)
414 intmax_t byte_number_max
= bytes
;
416 for (int f
= 0; f
< 2; f
++)
417 if (0 <= stat_buf
[f
].st_size
&& S_ISREG (stat_buf
[f
].st_mode
))
419 off_t pos
= file_position (f
);
421 byte_number_max
= MIN (byte_number_max
,
422 MAX (0, stat_buf
[f
].st_size
- pos
));
425 for (offset_width
= 1; (byte_number_max
/= 10) != 0; offset_width
++)
429 offset_width
= -comparison_type
;
431 bool eof
[2] = { false, false };
433 for (int f
= 0; f
< 2; f
++)
435 intmax_t ig
= ignore_initial
[f
];
439 if (0 <= file_position (f
))
440 continue; /* lseek sufficed. */
442 if (! (0 <= ig
&& ig
< TYPE_MAXIMUM (off_t
))
443 && -1 <= stat_buf
[f
].st_size
&& S_ISREG (stat_buf
[f
].st_mode
))
445 /* When ignoring at least TYPE_MAXIMUM (off_t) bytes
446 of a regular file, pretend to be at end of file,
447 as lseeking to TYPE_MAXIMUM (off_t) might tickle a kernel bug,
448 and lseeking to file end would race with a growing file. */
453 /* Report an error if asked to ignore more than
454 INTMAX_MAX bytes of a non-regular file,
455 as the actual number of bytes to ignore is not known. */
456 error (EXIT_TROUBLE
, EOVERFLOW
, "%s", squote (0, file
[f
]));
460 /* Read and discard the ignored initial prefix. */
463 idx_t bytes_to_read
= MIN (ig
, buf_size
);
464 ptrdiff_t r
= block_read (file_desc
[f
], buf0
, bytes_to_read
);
465 if (r
!= bytes_to_read
)
468 error (EXIT_TROUBLE
, errno
, "%s", squote (0, file
[f
]));
477 bool at_line_start
= true;
478 intmax_t line_number
= 1; /* Line number (1...) of difference. */
479 intmax_t byte_number
= 1; /* Byte number (1...) of difference. */
480 intmax_t remaining
= bytes
; /* Remaining bytes to compare, or -1. */
484 idx_t bytes_to_read
= MIN (buf_size
, remaining
);
485 remaining
-= bytes_to_read
;
487 ptrdiff_t read0
= (eof
[0] ? 0
488 : block_read (file_desc
[0], buf0
, bytes_to_read
));
490 error (EXIT_TROUBLE
, errno
, "%s", squote (0, file
[0]));
491 ptrdiff_t read1
= (eof
[1] ? 0
492 : block_read (file_desc
[1], buf1
, bytes_to_read
));
494 error (EXIT_TROUBLE
, errno
, "%s", squote (0, file
[1]));
496 idx_t smaller
= MIN (read0
, read1
);
498 idx_t first_diff
; /* Offset (0...) in buffers of 1st diff. */
500 /* Optimize the common case where the buffers are the same. */
501 if (memcmp (buf0
, buf1
, smaller
) == 0)
502 first_diff
= smaller
;
505 /* Insert sentinels for the block compare. */
507 buf1
[read0
] = 0x55; /* arbitrary */
509 buf0
[read1
] = 0x79; /* arbitrary and distinct from the above */
510 buf0
[read0
] = ~buf1
[read0
];
511 buf1
[read1
] = ~buf0
[read1
];
512 /* Ensure all bytes of a final word-read are initialized. */
513 memset (buf0
+ read0
+ 1, 0,
514 sizeof (word
) - read0
% sizeof (word
) - 1);
515 memset (buf1
+ read1
+ 1, 0,
516 sizeof (word
) - read1
% sizeof (word
) - 1);
518 first_diff
= block_compare (buffer0
, buffer1
);
521 byte_number
+= first_diff
;
522 if (offset_width
== -type_first_diff
&& first_diff
!= 0)
524 line_number
+= count_newlines (buf0
, first_diff
);
525 at_line_start
= buf0
[first_diff
- 1] == '\n';
530 if (first_diff
< smaller
)
532 switch (offset_width
)
534 case -type_first_diff
:
536 if (!opt_print_bytes
)
538 /* See POSIX for this format. This message is
539 used only in the POSIX locale, so it need not
541 static char const char_message
[] =
542 "%s %s differ: char %"PRIdMAX
", line %"PRIdMAX
"\n";
544 /* The POSIX rationale recommends using the word
545 "byte" outside the POSIX locale. Some gettext
546 implementations translate even in the POSIX
547 locale if certain other environment variables
548 are set, so use "byte" if a translation is
549 available, or if outside the POSIX locale. */
550 static char const byte_msgid
[] =
551 N_("%s %s differ: byte %"PRIdMAX
", line %"PRIdMAX
"\n");
552 char const *byte_message
= _(byte_msgid
);
553 bool use_byte_message
= (byte_message
!= byte_msgid
554 || hard_locale_LC_MESSAGES ());
556 printf (use_byte_message
? byte_message
: char_message
,
557 file
[0], file
[1], byte_number
, line_number
);
561 unsigned char c0
= buf0
[first_diff
];
562 unsigned char c1
= buf1
[first_diff
];
567 printf (_("%s %s differ: byte %"PRIdMAX
", line %"PRIdMAX
568 " is %3o %s %3o %s\n"),
569 file
[0], file
[1], byte_number
, line_number
,
578 dassert (comparison_type
== type_all_diffs
);
582 unsigned char c0
= buf0
[first_diff
];
583 unsigned char c1
= buf1
[first_diff
];
586 if (!opt_print_bytes
)
588 /* See POSIX for this format. */
589 printf ("%*"PRIdMAX
" %3o %3o\n",
590 offset_width
, byte_number
, c0
, c1
);
598 printf ("%*"PRIdMAX
" %3o %-4s %3o %s\n",
599 offset_width
, byte_number
, c0
, s0
, c1
, s1
);
605 while (first_diff
< smaller
);
610 case -type_no_stdout
:
618 /* POSIX says that each of these format strings must be
619 "cmp: EOF on %s", optionally followed by a blank and
620 extra text sans newline, then terminated by "\n". */
621 if (differing
<= 0 && offset_width
!= -type_status
)
624 ? N_("cmp: EOF on %s which is empty\n")
625 : offset_width
!= -type_first_diff
626 ? N_("cmp: EOF on %s after byte %"PRIdMAX
"\n")
628 ? N_("cmp: EOF on %s after byte %"PRIdMAX
","
629 " line %"PRIdMAX
"\n")
630 : N_("cmp: EOF on %s after byte %"PRIdMAX
","
631 " in line %"PRIdMAX
"\n")),
632 quote (file
[read1
< read0
]),
633 byte_number
- 1, line_number
- at_line_start
);
637 if (0 < differing
|| read0
!= buf_size
)
638 return differing
== 0 ? EXIT_SUCCESS
: EXIT_FAILURE
;
642 /* Compare two blocks of memory P0 and P1 until they differ.
643 If the blocks are not guaranteed to be different, put sentinels at the ends
644 of the blocks before calling this function.
646 Return the offset of the first byte that differs. */
649 block_compare (word
const *p0
, word
const *p1
)
654 /* Find the rough position of the first difference by reading words,
657 for (l0
= p0
, l1
= p1
; *l0
== *l1
; l0
++, l1
++)
660 /* Find the exact differing position (endianness independent). */
662 for (c0
= (char const *) l0
, c1
= (char const *) l1
;
667 return c0
- (char const *) p0
;
670 /* Return the number of newlines in BUF, of size BUFSIZE,
671 where BUF[NBYTES] is available for use as a sentinel. */
674 count_newlines (char *buf
, idx_t bufsize
)
677 char *lim
= buf
+ bufsize
;
680 for (char *p
= buf
; (p
= rawmemchr (p
, '\n')) != lim
; p
++)
686 /* Put into BUF the unsigned char C, making unprintable bytes
687 visible by quoting like cat -t does. */
690 sprintc (char *buf
, unsigned char c
)
716 /* Position file F to ignore_initial[F] bytes from its initial position,
717 and yield its new position. Return a negative number on failure.
718 Do not report an error on failure, as lseek is generally a no-op
719 on devices that cannot seek. Don't try more than once. */
722 file_position (int f
)
724 /* The initial position of input file F, and whether that position has
725 been determined. The position is -1 if it could not be determined. */
726 static bool positioned
[2];
727 static off_t position
[2];
731 positioned
[f
] = true;
732 off_t pos
= ignore_initial
[f
];
733 position
[f
] = (0 <= pos
&& pos
<= TYPE_MAXIMUM (off_t
)
734 ? lseek (file_desc
[f
], pos
, SEEK_CUR
)