Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / diffutils / src / cmp.c
blobcce4f7a8079705ee1045196484d07f09ff10eba7
1 /* $NetBSD$ */
3 /* cmp - compare two files byte by byte
5 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
6 2002 Free Software Foundation, Inc.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 See the GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 #include "system.h"
25 #include <stdio.h>
26 #include <cmpbuf.h>
27 #include <c-stack.h>
28 #include <error.h>
29 #include <exitfail.h>
30 #include <freesoft.h>
31 #include <getopt.h>
32 #include <hard-locale.h>
33 #include <inttostr.h>
34 #include <setmode.h>
35 #include <xalloc.h>
36 #include <xstrtol.h>
38 #if defined LC_MESSAGES && ENABLE_NLS
39 # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
40 #else
41 # define hard_locale_LC_MESSAGES 0
42 #endif
44 static char const authorship_msgid[] =
45 N_("Written by Torbjorn Granlund and David MacKenzie.");
47 static char const copyright_string[] =
48 "Copyright (C) 2002 Free Software Foundation, Inc.";
50 extern char const version_string[];
52 static int cmp (void);
53 static off_t file_position (int);
54 static size_t block_compare (word const *, word const *);
55 static size_t block_compare_and_count (word const *, word const *, off_t *);
56 static void sprintc (char *, unsigned char);
58 /* Name under which this program was invoked. */
59 char *program_name;
61 /* Filenames of the compared files. */
62 static char const *file[2];
64 /* File descriptors of the files. */
65 static int file_desc[2];
67 /* Status of the files. */
68 static struct stat stat_buf[2];
70 /* Read buffers for the files. */
71 static word *buffer[2];
73 /* Optimal block size for the files. */
74 static size_t buf_size;
76 /* Initial prefix to ignore for each file. */
77 static off_t ignore_initial[2];
79 /* Number of bytes to compare. */
80 static uintmax_t bytes = UINTMAX_MAX;
82 /* Output format. */
83 static enum comparison_type
85 type_first_diff, /* Print the first difference. */
86 type_all_diffs, /* Print all differences. */
87 type_status /* Exit status only. */
88 } comparison_type;
90 /* If nonzero, print values of bytes quoted like cat -t does. */
91 static bool opt_print_bytes;
93 /* Values for long options that do not have single-letter equivalents. */
94 enum
96 HELP_OPTION = CHAR_MAX + 1
99 static struct option const long_options[] =
101 {"print-bytes", 0, 0, 'b'},
102 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
103 {"ignore-initial", 1, 0, 'i'},
104 {"verbose", 0, 0, 'l'},
105 {"bytes", 1, 0, 'n'},
106 {"silent", 0, 0, 's'},
107 {"quiet", 0, 0, 's'},
108 {"version", 0, 0, 'v'},
109 {"help", 0, 0, HELP_OPTION},
110 {0, 0, 0, 0}
113 static void try_help (char const *, char const *) __attribute__((noreturn));
114 static void
115 try_help (char const *reason_msgid, char const *operand)
117 if (reason_msgid)
118 error (0, 0, _(reason_msgid), operand);
119 error (EXIT_TROUBLE, 0,
120 _("Try `%s --help' for more information."), program_name);
121 abort ();
124 static char const valid_suffixes[] = "kKMGTPEZY0";
126 /* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to
127 point after the operand. If DELIMITER is nonzero, the operand may
128 be followed by DELIMITER; otherwise it must be null-terminated. */
129 static off_t
130 parse_ignore_initial (char **argptr, char delimiter)
132 uintmax_t val;
133 off_t o;
134 char const *arg = *argptr;
135 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
136 if (! (e == LONGINT_OK
137 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
138 || (o = val) < 0 || o != val || val == UINTMAX_MAX)
139 try_help ("invalid --ignore-initial value `%s'", arg);
140 return o;
143 /* Specify the output format. */
144 static void
145 specify_comparison_type (enum comparison_type t)
147 if (comparison_type)
148 try_help ("options -l and -s are incompatible", 0);
149 comparison_type = t;
152 static void
153 check_stdout (void)
155 if (ferror (stdout))
156 error (EXIT_TROUBLE, 0, "%s", _("write failed"));
157 else if (fclose (stdout) != 0)
158 error (EXIT_TROUBLE, errno, "%s", _("standard output"));
161 static char const * const option_help_msgid[] = {
162 N_("-b --print-bytes Print differing bytes."),
163 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
164 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
165 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
166 N_("-l --verbose Output byte numbers and values of all differing bytes."),
167 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
168 N_("-s --quiet --silent Output nothing; yield exit status only."),
169 N_("-v --version Output version info."),
170 N_("--help Output this help."),
174 static void
175 usage (void)
177 char const * const *p;
179 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
180 program_name);
181 printf ("%s\n\n", _("Compare two files byte by byte."));
182 for (p = option_help_msgid; *p; p++)
183 printf (" %s\n", _(*p));
184 printf ("\n%s\n%s\n\n%s\n\n%s\n",
185 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
186 _("SKIP values may be followed by the following multiplicative suffixes:\n\
187 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
188 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
189 _("If a FILE is `-' or missing, read standard input."),
190 _("Report bugs to <bug-gnu-utils@gnu.org>."));
194 main (int argc, char **argv)
196 int c, f, exit_status;
197 size_t words_per_buffer;
199 exit_failure = EXIT_TROUBLE;
200 initialize_main (&argc, &argv);
201 program_name = argv[0];
202 setlocale (LC_ALL, "");
203 bindtextdomain (PACKAGE, LOCALEDIR);
204 textdomain (PACKAGE);
205 c_stack_action (c_stack_die);
207 /* Parse command line options. */
209 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
210 != -1)
211 switch (c)
213 case 'b':
214 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
215 opt_print_bytes = 1;
216 break;
218 case 'i':
219 ignore_initial[0] = parse_ignore_initial (&optarg, ':');
220 ignore_initial[1] = (*optarg++ == ':'
221 ? parse_ignore_initial (&optarg, 0)
222 : ignore_initial[0]);
223 break;
225 case 'l':
226 specify_comparison_type (type_all_diffs);
227 break;
229 case 'n':
231 uintmax_t n;
232 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
233 try_help ("invalid --bytes value `%s'", optarg);
234 if (n < bytes)
235 bytes = n;
237 break;
239 case 's':
240 specify_comparison_type (type_status);
241 break;
243 case 'v':
244 printf ("cmp %s\n%s\n\n%s\n\n%s\n",
245 version_string, copyright_string,
246 _(free_software_msgid), _(authorship_msgid));
247 check_stdout ();
248 return EXIT_SUCCESS;
250 case HELP_OPTION:
251 usage ();
252 check_stdout ();
253 return EXIT_SUCCESS;
255 default:
256 try_help (0, 0);
259 if (optind == argc)
260 try_help ("missing operand after `%s'", argv[argc - 1]);
262 file[0] = argv[optind++];
263 file[1] = optind < argc ? argv[optind++] : "-";
265 for (f = 0; f < 2 && optind < argc; f++)
267 char *arg = argv[optind++];
268 ignore_initial[f] = parse_ignore_initial (&arg, 0);
271 if (optind < argc)
272 try_help ("extra operand `%s'", argv[optind]);
274 for (f = 0; f < 2; f++)
276 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
277 stdin is closed and opening file[0] yields file descriptor 0. */
278 int f1 = f ^ (strcmp (file[1], "-") == 0);
280 /* Two files with the same name are identical.
281 But wait until we open the file once, for proper diagnostics. */
282 if (f && file_name_cmp (file[0], file[1]) == 0)
283 return EXIT_SUCCESS;
285 file_desc[f1] = (strcmp (file[f1], "-") == 0
286 ? STDIN_FILENO
287 : open (file[f1], O_RDONLY, 0));
288 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
290 if (file_desc[f1] < 0 && comparison_type == type_status)
291 exit (EXIT_TROUBLE);
292 else
293 error (EXIT_TROUBLE, errno, "%s", file[f1]);
296 set_binary_mode (file_desc[f1], 1);
299 /* If the files are links to the same inode and have the same file position,
300 they are identical. */
302 if (0 < same_file (&stat_buf[0], &stat_buf[1])
303 && same_file_attributes (&stat_buf[0], &stat_buf[1])
304 && file_position (0) == file_position (1))
305 return EXIT_SUCCESS;
307 /* If output is redirected to the null device, we may assume `-s'. */
309 if (comparison_type != type_status)
311 struct stat outstat, nullstat;
313 if (fstat (STDOUT_FILENO, &outstat) == 0
314 && stat (NULL_DEVICE, &nullstat) == 0
315 && 0 < same_file (&outstat, &nullstat))
316 comparison_type = type_status;
319 /* If only a return code is needed,
320 and if both input descriptors are associated with plain files,
321 conclude that the files differ if they have different sizes
322 and if more bytes will be compared than are in the smaller file. */
324 if (comparison_type == type_status
325 && S_ISREG (stat_buf[0].st_mode)
326 && S_ISREG (stat_buf[1].st_mode))
328 off_t s0 = stat_buf[0].st_size - file_position (0);
329 off_t s1 = stat_buf[1].st_size - file_position (1);
330 if (s0 < 0)
331 s0 = 0;
332 if (s1 < 0)
333 s1 = 0;
334 if (s0 != s1 && MIN (s0, s1) < bytes)
335 exit (EXIT_FAILURE);
338 /* Get the optimal block size of the files. */
340 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
341 STAT_BLOCKSIZE (stat_buf[1]),
342 PTRDIFF_MAX - sizeof (word));
344 /* Allocate word-aligned buffers, with space for sentinels at the end. */
346 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
347 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
348 buffer[1] = buffer[0] + words_per_buffer;
350 exit_status = cmp ();
352 for (f = 0; f < 2; f++)
353 if (close (file_desc[f]) != 0)
354 error (EXIT_TROUBLE, errno, "%s", file[f]);
355 if (exit_status != 0 && comparison_type != type_status)
356 check_stdout ();
357 exit (exit_status);
358 return exit_status;
361 /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
362 using `buffer[0]' and `buffer[1]'.
363 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
364 >1 if error. */
366 static int
367 cmp (void)
369 off_t line_number = 1; /* Line number (1...) of difference. */
370 off_t byte_number = 1; /* Byte number (1...) of difference. */
371 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
372 size_t read0, read1; /* Number of bytes read from each file. */
373 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
374 size_t smaller; /* The lesser of `read0' and `read1'. */
375 word *buffer0 = buffer[0];
376 word *buffer1 = buffer[1];
377 char *buf0 = (char *) buffer0;
378 char *buf1 = (char *) buffer1;
379 int ret = EXIT_SUCCESS;
380 int f;
381 int offset_width;
383 if (comparison_type == type_all_diffs)
385 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
387 for (f = 0; f < 2; f++)
388 if (S_ISREG (stat_buf[f].st_mode))
390 off_t file_bytes = stat_buf[f].st_size - file_position (f);
391 if (file_bytes < byte_number_max)
392 byte_number_max = file_bytes;
395 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
396 continue;
399 for (f = 0; f < 2; f++)
401 off_t ig = ignore_initial[f];
402 if (ig && file_position (f) == -1)
404 /* lseek failed; read and discard the ignored initial prefix. */
407 size_t bytes_to_read = MIN (ig, buf_size);
408 size_t r = block_read (file_desc[f], buf0, bytes_to_read);
409 if (r != bytes_to_read)
411 if (r == SIZE_MAX)
412 error (EXIT_TROUBLE, errno, "%s", file[f]);
413 break;
415 ig -= r;
417 while (ig);
423 size_t bytes_to_read = buf_size;
425 if (remaining != UINTMAX_MAX)
427 if (remaining < bytes_to_read)
428 bytes_to_read = remaining;
429 remaining -= bytes_to_read;
432 read0 = block_read (file_desc[0], buf0, bytes_to_read);
433 if (read0 == SIZE_MAX)
434 error (EXIT_TROUBLE, errno, "%s", file[0]);
435 read1 = block_read (file_desc[1], buf1, bytes_to_read);
436 if (read1 == SIZE_MAX)
437 error (EXIT_TROUBLE, errno, "%s", file[1]);
439 /* Insert sentinels for the block compare. */
441 buf0[read0] = ~buf1[read0];
442 buf1[read1] = ~buf0[read1];
444 /* If the line number should be written for differing files,
445 compare the blocks and count the number of newlines
446 simultaneously. */
447 first_diff = (comparison_type == type_first_diff
448 ? block_compare_and_count (buffer0, buffer1, &line_number)
449 : block_compare (buffer0, buffer1));
451 byte_number += first_diff;
452 smaller = MIN (read0, read1);
454 if (first_diff < smaller)
456 switch (comparison_type)
458 case type_first_diff:
460 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
461 char line_buf[INT_BUFSIZE_BOUND (off_t)];
462 char const *byte_num = offtostr (byte_number, byte_buf);
463 char const *line_num = offtostr (line_number, line_buf);
464 if (!opt_print_bytes)
466 /* See POSIX 1003.1-2001 for this format. This
467 message is used only in the POSIX locale, so it
468 need not be translated. */
469 static char const char_message[] =
470 "%s %s differ: char %s, line %s\n";
472 /* The POSIX rationale recommends using the word
473 "byte" outside the POSIX locale. Some gettext
474 implementations translate even in the POSIX
475 locale if certain other environment variables
476 are set, so use "byte" if a translation is
477 available, or if outside the POSIX locale. */
478 static char const byte_msgid[] =
479 N_("%s %s differ: byte %s, line %s\n");
480 char const *byte_message = _(byte_msgid);
481 bool use_byte_message = (byte_message != byte_msgid
482 || hard_locale_LC_MESSAGES);
484 printf ((use_byte_message
485 ? byte_message
486 : "%s %s differ: char %s, line %s\n"),
487 file[0], file[1], byte_num, line_num);
489 else
491 unsigned char c0 = buf0[first_diff];
492 unsigned char c1 = buf1[first_diff];
493 char s0[5];
494 char s1[5];
495 sprintc (s0, c0);
496 sprintc (s1, c1);
497 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
498 file[0], file[1], byte_num, line_num,
499 c0, s0, c1, s1);
502 /* Fall through. */
503 case type_status:
504 return EXIT_FAILURE;
506 case type_all_diffs:
509 unsigned char c0 = buf0[first_diff];
510 unsigned char c1 = buf1[first_diff];
511 if (c0 != c1)
513 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
514 char const *byte_num = offtostr (byte_number, byte_buf);
515 if (!opt_print_bytes)
517 /* See POSIX 1003.1-2001 for this format. */
518 printf ("%*s %3o %3o\n",
519 offset_width, byte_num, c0, c1);
521 else
523 char s0[5];
524 char s1[5];
525 sprintc (s0, c0);
526 sprintc (s1, c1);
527 printf ("%*s %3o %-4s %3o %s\n",
528 offset_width, byte_num, c0, s0, c1, s1);
531 byte_number++;
532 first_diff++;
534 while (first_diff < smaller);
535 ret = EXIT_FAILURE;
536 break;
540 if (read0 != read1)
542 if (comparison_type != type_status)
544 /* See POSIX 1003.1-2001 for this format. */
545 fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
548 return EXIT_FAILURE;
551 while (read0 == buf_size);
553 return ret;
556 /* Compare two blocks of memory P0 and P1 until they differ,
557 and count the number of '\n' occurrences in the common
558 part of P0 and P1.
559 If the blocks are not guaranteed to be different, put sentinels at the ends
560 of the blocks before calling this function.
562 Return the offset of the first byte that differs.
563 Increment *COUNT by the count of '\n' occurrences. */
565 static size_t
566 block_compare_and_count (word const *p0, word const *p1, off_t *count)
568 word l; /* One word from first buffer. */
569 word const *l0, *l1; /* Pointers into each buffer. */
570 char const *c0, *c1; /* Pointers for finding exact address. */
571 size_t cnt = 0; /* Number of '\n' occurrences. */
572 word nnnn; /* Newline, sizeof (word) times. */
573 int i;
575 nnnn = 0;
576 for (i = 0; i < sizeof nnnn; i++)
577 nnnn = (nnnn << CHAR_BIT) | '\n';
579 /* Find the rough position of the first difference by reading words,
580 not bytes. */
582 for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
584 l ^= nnnn;
585 for (i = 0; i < sizeof l; i++)
587 cnt += ! (unsigned char) l;
588 l >>= CHAR_BIT;
592 /* Find the exact differing position (endianness independent). */
594 for (c0 = (char const *) l0, c1 = (char const *) l1;
595 *c0 == *c1;
596 c0++, c1++)
597 cnt += *c0 == '\n';
599 *count += cnt;
600 return c0 - (char const *) p0;
603 /* Compare two blocks of memory P0 and P1 until they differ.
604 If the blocks are not guaranteed to be different, put sentinels at the ends
605 of the blocks before calling this function.
607 Return the offset of the first byte that differs. */
609 static size_t
610 block_compare (word const *p0, word const *p1)
612 word const *l0, *l1;
613 char const *c0, *c1;
615 /* Find the rough position of the first difference by reading words,
616 not bytes. */
618 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
619 continue;
621 /* Find the exact differing position (endianness independent). */
623 for (c0 = (char const *) l0, c1 = (char const *) l1;
624 *c0 == *c1;
625 c0++, c1++)
626 continue;
628 return c0 - (char const *) p0;
631 /* Put into BUF the unsigned char C, making unprintable bytes
632 visible by quoting like cat -t does. */
634 static void
635 sprintc (char *buf, unsigned char c)
637 if (! ISPRINT (c))
639 if (c >= 128)
641 *buf++ = 'M';
642 *buf++ = '-';
643 c -= 128;
645 if (c < 32)
647 *buf++ = '^';
648 c += 64;
650 else if (c == 127)
652 *buf++ = '^';
653 c = '?';
657 *buf++ = c;
658 *buf = 0;
661 /* Position file F to ignore_initial[F] bytes from its initial position,
662 and yield its new position. Don't try more than once. */
664 static off_t
665 file_position (int f)
667 static bool positioned[2];
668 static off_t position[2];
670 if (! positioned[f])
672 positioned[f] = 1;
673 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
675 return position[f];