3 /* cmp - compare two files byte by byte
5 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
6 2002 Free Software Foundation, Inc.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 See the GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
32 #include <hard-locale.h>
38 #if defined LC_MESSAGES && ENABLE_NLS
39 # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
41 # define hard_locale_LC_MESSAGES 0
44 static char const authorship_msgid
[] =
45 N_("Written by Torbjorn Granlund and David MacKenzie.");
47 static char const copyright_string
[] =
48 "Copyright (C) 2002 Free Software Foundation, Inc.";
50 extern char const version_string
[];
52 static int cmp (void);
53 static off_t
file_position (int);
54 static size_t block_compare (word
const *, word
const *);
55 static size_t block_compare_and_count (word
const *, word
const *, off_t
*);
56 static void sprintc (char *, unsigned char);
58 /* Name under which this program was invoked. */
61 /* Filenames of the compared files. */
62 static char const *file
[2];
64 /* File descriptors of the files. */
65 static int file_desc
[2];
67 /* Status of the files. */
68 static struct stat stat_buf
[2];
70 /* Read buffers for the files. */
71 static word
*buffer
[2];
73 /* Optimal block size for the files. */
74 static size_t buf_size
;
76 /* Initial prefix to ignore for each file. */
77 static off_t ignore_initial
[2];
79 /* Number of bytes to compare. */
80 static uintmax_t bytes
= UINTMAX_MAX
;
83 static enum comparison_type
85 type_first_diff
, /* Print the first difference. */
86 type_all_diffs
, /* Print all differences. */
87 type_status
/* Exit status only. */
90 /* If nonzero, print values of bytes quoted like cat -t does. */
91 static bool opt_print_bytes
;
93 /* Values for long options that do not have single-letter equivalents. */
96 HELP_OPTION
= CHAR_MAX
+ 1
99 static struct option
const long_options
[] =
101 {"print-bytes", 0, 0, 'b'},
102 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
103 {"ignore-initial", 1, 0, 'i'},
104 {"verbose", 0, 0, 'l'},
105 {"bytes", 1, 0, 'n'},
106 {"silent", 0, 0, 's'},
107 {"quiet", 0, 0, 's'},
108 {"version", 0, 0, 'v'},
109 {"help", 0, 0, HELP_OPTION
},
113 static void try_help (char const *, char const *) __attribute__((noreturn
));
115 try_help (char const *reason_msgid
, char const *operand
)
118 error (0, 0, _(reason_msgid
), operand
);
119 error (EXIT_TROUBLE
, 0,
120 _("Try `%s --help' for more information."), program_name
);
124 static char const valid_suffixes
[] = "kKMGTPEZY0";
126 /* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to
127 point after the operand. If DELIMITER is nonzero, the operand may
128 be followed by DELIMITER; otherwise it must be null-terminated. */
130 parse_ignore_initial (char **argptr
, char delimiter
)
134 char const *arg
= *argptr
;
135 strtol_error e
= xstrtoumax (arg
, argptr
, 0, &val
, valid_suffixes
);
136 if (! (e
== LONGINT_OK
137 || (e
== LONGINT_INVALID_SUFFIX_CHAR
&& **argptr
== delimiter
))
138 || (o
= val
) < 0 || o
!= val
|| val
== UINTMAX_MAX
)
139 try_help ("invalid --ignore-initial value `%s'", arg
);
143 /* Specify the output format. */
145 specify_comparison_type (enum comparison_type t
)
148 try_help ("options -l and -s are incompatible", 0);
156 error (EXIT_TROUBLE
, 0, "%s", _("write failed"));
157 else if (fclose (stdout
) != 0)
158 error (EXIT_TROUBLE
, errno
, "%s", _("standard output"));
161 static char const * const option_help_msgid
[] = {
162 N_("-b --print-bytes Print differing bytes."),
163 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
164 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
165 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
166 N_("-l --verbose Output byte numbers and values of all differing bytes."),
167 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
168 N_("-s --quiet --silent Output nothing; yield exit status only."),
169 N_("-v --version Output version info."),
170 N_("--help Output this help."),
177 char const * const *p
;
179 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
181 printf ("%s\n\n", _("Compare two files byte by byte."));
182 for (p
= option_help_msgid
; *p
; p
++)
183 printf (" %s\n", _(*p
));
184 printf ("\n%s\n%s\n\n%s\n\n%s\n",
185 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
186 _("SKIP values may be followed by the following multiplicative suffixes:\n\
187 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
188 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
189 _("If a FILE is `-' or missing, read standard input."),
190 _("Report bugs to <bug-gnu-utils@gnu.org>."));
194 main (int argc
, char **argv
)
196 int c
, f
, exit_status
;
197 size_t words_per_buffer
;
199 exit_failure
= EXIT_TROUBLE
;
200 initialize_main (&argc
, &argv
);
201 program_name
= argv
[0];
202 setlocale (LC_ALL
, "");
203 bindtextdomain (PACKAGE
, LOCALEDIR
);
204 textdomain (PACKAGE
);
205 c_stack_action (c_stack_die
);
207 /* Parse command line options. */
209 while ((c
= getopt_long (argc
, argv
, "bci:ln:sv", long_options
, 0))
214 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
219 ignore_initial
[0] = parse_ignore_initial (&optarg
, ':');
220 ignore_initial
[1] = (*optarg
++ == ':'
221 ? parse_ignore_initial (&optarg
, 0)
222 : ignore_initial
[0]);
226 specify_comparison_type (type_all_diffs
);
232 if (xstrtoumax (optarg
, 0, 0, &n
, valid_suffixes
) != LONGINT_OK
)
233 try_help ("invalid --bytes value `%s'", optarg
);
240 specify_comparison_type (type_status
);
244 printf ("cmp %s\n%s\n\n%s\n\n%s\n",
245 version_string
, copyright_string
,
246 _(free_software_msgid
), _(authorship_msgid
));
260 try_help ("missing operand after `%s'", argv
[argc
- 1]);
262 file
[0] = argv
[optind
++];
263 file
[1] = optind
< argc
? argv
[optind
++] : "-";
265 for (f
= 0; f
< 2 && optind
< argc
; f
++)
267 char *arg
= argv
[optind
++];
268 ignore_initial
[f
] = parse_ignore_initial (&arg
, 0);
272 try_help ("extra operand `%s'", argv
[optind
]);
274 for (f
= 0; f
< 2; f
++)
276 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
277 stdin is closed and opening file[0] yields file descriptor 0. */
278 int f1
= f
^ (strcmp (file
[1], "-") == 0);
280 /* Two files with the same name are identical.
281 But wait until we open the file once, for proper diagnostics. */
282 if (f
&& file_name_cmp (file
[0], file
[1]) == 0)
285 file_desc
[f1
] = (strcmp (file
[f1
], "-") == 0
287 : open (file
[f1
], O_RDONLY
, 0));
288 if (file_desc
[f1
] < 0 || fstat (file_desc
[f1
], stat_buf
+ f1
) != 0)
290 if (file_desc
[f1
] < 0 && comparison_type
== type_status
)
293 error (EXIT_TROUBLE
, errno
, "%s", file
[f1
]);
296 set_binary_mode (file_desc
[f1
], 1);
299 /* If the files are links to the same inode and have the same file position,
300 they are identical. */
302 if (0 < same_file (&stat_buf
[0], &stat_buf
[1])
303 && same_file_attributes (&stat_buf
[0], &stat_buf
[1])
304 && file_position (0) == file_position (1))
307 /* If output is redirected to the null device, we may assume `-s'. */
309 if (comparison_type
!= type_status
)
311 struct stat outstat
, nullstat
;
313 if (fstat (STDOUT_FILENO
, &outstat
) == 0
314 && stat (NULL_DEVICE
, &nullstat
) == 0
315 && 0 < same_file (&outstat
, &nullstat
))
316 comparison_type
= type_status
;
319 /* If only a return code is needed,
320 and if both input descriptors are associated with plain files,
321 conclude that the files differ if they have different sizes
322 and if more bytes will be compared than are in the smaller file. */
324 if (comparison_type
== type_status
325 && S_ISREG (stat_buf
[0].st_mode
)
326 && S_ISREG (stat_buf
[1].st_mode
))
328 off_t s0
= stat_buf
[0].st_size
- file_position (0);
329 off_t s1
= stat_buf
[1].st_size
- file_position (1);
334 if (s0
!= s1
&& MIN (s0
, s1
) < bytes
)
338 /* Get the optimal block size of the files. */
340 buf_size
= buffer_lcm (STAT_BLOCKSIZE (stat_buf
[0]),
341 STAT_BLOCKSIZE (stat_buf
[1]),
342 PTRDIFF_MAX
- sizeof (word
));
344 /* Allocate word-aligned buffers, with space for sentinels at the end. */
346 words_per_buffer
= (buf_size
+ 2 * sizeof (word
) - 1) / sizeof (word
);
347 buffer
[0] = xmalloc (2 * sizeof (word
) * words_per_buffer
);
348 buffer
[1] = buffer
[0] + words_per_buffer
;
350 exit_status
= cmp ();
352 for (f
= 0; f
< 2; f
++)
353 if (close (file_desc
[f
]) != 0)
354 error (EXIT_TROUBLE
, errno
, "%s", file
[f
]);
355 if (exit_status
!= 0 && comparison_type
!= type_status
)
361 /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
362 using `buffer[0]' and `buffer[1]'.
363 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
369 off_t line_number
= 1; /* Line number (1...) of difference. */
370 off_t byte_number
= 1; /* Byte number (1...) of difference. */
371 uintmax_t remaining
= bytes
; /* Remaining number of bytes to compare. */
372 size_t read0
, read1
; /* Number of bytes read from each file. */
373 size_t first_diff
; /* Offset (0...) in buffers of 1st diff. */
374 size_t smaller
; /* The lesser of `read0' and `read1'. */
375 word
*buffer0
= buffer
[0];
376 word
*buffer1
= buffer
[1];
377 char *buf0
= (char *) buffer0
;
378 char *buf1
= (char *) buffer1
;
379 int ret
= EXIT_SUCCESS
;
383 if (comparison_type
== type_all_diffs
)
385 off_t byte_number_max
= MIN (bytes
, TYPE_MAXIMUM (off_t
));
387 for (f
= 0; f
< 2; f
++)
388 if (S_ISREG (stat_buf
[f
].st_mode
))
390 off_t file_bytes
= stat_buf
[f
].st_size
- file_position (f
);
391 if (file_bytes
< byte_number_max
)
392 byte_number_max
= file_bytes
;
395 for (offset_width
= 1; (byte_number_max
/= 10) != 0; offset_width
++)
399 for (f
= 0; f
< 2; f
++)
401 off_t ig
= ignore_initial
[f
];
402 if (ig
&& file_position (f
) == -1)
404 /* lseek failed; read and discard the ignored initial prefix. */
407 size_t bytes_to_read
= MIN (ig
, buf_size
);
408 size_t r
= block_read (file_desc
[f
], buf0
, bytes_to_read
);
409 if (r
!= bytes_to_read
)
412 error (EXIT_TROUBLE
, errno
, "%s", file
[f
]);
423 size_t bytes_to_read
= buf_size
;
425 if (remaining
!= UINTMAX_MAX
)
427 if (remaining
< bytes_to_read
)
428 bytes_to_read
= remaining
;
429 remaining
-= bytes_to_read
;
432 read0
= block_read (file_desc
[0], buf0
, bytes_to_read
);
433 if (read0
== SIZE_MAX
)
434 error (EXIT_TROUBLE
, errno
, "%s", file
[0]);
435 read1
= block_read (file_desc
[1], buf1
, bytes_to_read
);
436 if (read1
== SIZE_MAX
)
437 error (EXIT_TROUBLE
, errno
, "%s", file
[1]);
439 /* Insert sentinels for the block compare. */
441 buf0
[read0
] = ~buf1
[read0
];
442 buf1
[read1
] = ~buf0
[read1
];
444 /* If the line number should be written for differing files,
445 compare the blocks and count the number of newlines
447 first_diff
= (comparison_type
== type_first_diff
448 ? block_compare_and_count (buffer0
, buffer1
, &line_number
)
449 : block_compare (buffer0
, buffer1
));
451 byte_number
+= first_diff
;
452 smaller
= MIN (read0
, read1
);
454 if (first_diff
< smaller
)
456 switch (comparison_type
)
458 case type_first_diff
:
460 char byte_buf
[INT_BUFSIZE_BOUND (off_t
)];
461 char line_buf
[INT_BUFSIZE_BOUND (off_t
)];
462 char const *byte_num
= offtostr (byte_number
, byte_buf
);
463 char const *line_num
= offtostr (line_number
, line_buf
);
464 if (!opt_print_bytes
)
466 /* See POSIX 1003.1-2001 for this format. This
467 message is used only in the POSIX locale, so it
468 need not be translated. */
469 static char const char_message
[] =
470 "%s %s differ: char %s, line %s\n";
472 /* The POSIX rationale recommends using the word
473 "byte" outside the POSIX locale. Some gettext
474 implementations translate even in the POSIX
475 locale if certain other environment variables
476 are set, so use "byte" if a translation is
477 available, or if outside the POSIX locale. */
478 static char const byte_msgid
[] =
479 N_("%s %s differ: byte %s, line %s\n");
480 char const *byte_message
= _(byte_msgid
);
481 bool use_byte_message
= (byte_message
!= byte_msgid
482 || hard_locale_LC_MESSAGES
);
484 printf ((use_byte_message
486 : "%s %s differ: char %s, line %s\n"),
487 file
[0], file
[1], byte_num
, line_num
);
491 unsigned char c0
= buf0
[first_diff
];
492 unsigned char c1
= buf1
[first_diff
];
497 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
498 file
[0], file
[1], byte_num
, line_num
,
509 unsigned char c0
= buf0
[first_diff
];
510 unsigned char c1
= buf1
[first_diff
];
513 char byte_buf
[INT_BUFSIZE_BOUND (off_t
)];
514 char const *byte_num
= offtostr (byte_number
, byte_buf
);
515 if (!opt_print_bytes
)
517 /* See POSIX 1003.1-2001 for this format. */
518 printf ("%*s %3o %3o\n",
519 offset_width
, byte_num
, c0
, c1
);
527 printf ("%*s %3o %-4s %3o %s\n",
528 offset_width
, byte_num
, c0
, s0
, c1
, s1
);
534 while (first_diff
< smaller
);
542 if (comparison_type
!= type_status
)
544 /* See POSIX 1003.1-2001 for this format. */
545 fprintf (stderr
, _("cmp: EOF on %s\n"), file
[read1
< read0
]);
551 while (read0
== buf_size
);
556 /* Compare two blocks of memory P0 and P1 until they differ,
557 and count the number of '\n' occurrences in the common
559 If the blocks are not guaranteed to be different, put sentinels at the ends
560 of the blocks before calling this function.
562 Return the offset of the first byte that differs.
563 Increment *COUNT by the count of '\n' occurrences. */
566 block_compare_and_count (word
const *p0
, word
const *p1
, off_t
*count
)
568 word l
; /* One word from first buffer. */
569 word
const *l0
, *l1
; /* Pointers into each buffer. */
570 char const *c0
, *c1
; /* Pointers for finding exact address. */
571 size_t cnt
= 0; /* Number of '\n' occurrences. */
572 word nnnn
; /* Newline, sizeof (word) times. */
576 for (i
= 0; i
< sizeof nnnn
; i
++)
577 nnnn
= (nnnn
<< CHAR_BIT
) | '\n';
579 /* Find the rough position of the first difference by reading words,
582 for (l0
= p0
, l1
= p1
; (l
= *l0
) == *l1
; l0
++, l1
++)
585 for (i
= 0; i
< sizeof l
; i
++)
587 cnt
+= ! (unsigned char) l
;
592 /* Find the exact differing position (endianness independent). */
594 for (c0
= (char const *) l0
, c1
= (char const *) l1
;
600 return c0
- (char const *) p0
;
603 /* Compare two blocks of memory P0 and P1 until they differ.
604 If the blocks are not guaranteed to be different, put sentinels at the ends
605 of the blocks before calling this function.
607 Return the offset of the first byte that differs. */
610 block_compare (word
const *p0
, word
const *p1
)
615 /* Find the rough position of the first difference by reading words,
618 for (l0
= p0
, l1
= p1
; *l0
== *l1
; l0
++, l1
++)
621 /* Find the exact differing position (endianness independent). */
623 for (c0
= (char const *) l0
, c1
= (char const *) l1
;
628 return c0
- (char const *) p0
;
631 /* Put into BUF the unsigned char C, making unprintable bytes
632 visible by quoting like cat -t does. */
635 sprintc (char *buf
, unsigned char c
)
661 /* Position file F to ignore_initial[F] bytes from its initial position,
662 and yield its new position. Don't try more than once. */
665 file_position (int f
)
667 static bool positioned
[2];
668 static off_t position
[2];
673 position
[f
] = lseek (file_desc
[f
], ignore_initial
[f
], SEEK_CUR
);