1 /* comm -- compare two sorted files line by line.
2 Copyright (C) 1986, 1990-1991, 1995-2005, 2008-2010 Free Software
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 /* Written by Richard Stallman and David MacKenzie. */
23 #include <sys/types.h>
25 #include "linebuffer.h"
28 #include "hard-locale.h"
34 /* The official name of this program (e.g., no `g' prefix). */
35 #define PROGRAM_NAME "comm"
38 proper_name ("Richard M. Stallman"), \
39 proper_name ("David MacKenzie")
41 /* Undefine, to avoid warning about redefinition on some systems. */
43 #define min(x, y) ((x) < (y) ? (x) : (y))
45 /* True if the LC_COLLATE locale is hard. */
46 static bool hard_LC_COLLATE
;
48 /* If true, print lines that are found only in file 1. */
49 static bool only_file_1
;
51 /* If true, print lines that are found only in file 2. */
52 static bool only_file_2
;
54 /* If true, print lines that are found in both files. */
57 /* If nonzero, we have seen at least one unpairable line. */
58 static bool seen_unpairable
;
60 /* If nonzero, we have warned about disorder in that file. */
61 static bool issued_disorder_warning
[2];
63 /* If nonzero, check that the input is correctly ordered. */
71 /* Output columns will be delimited with this string, which may be set
72 on the command-line with --output-delimiter=STR. The default is a
73 single TAB character. */
74 static char const *delimiter
;
76 /* For long options that have no equivalent short option, use a
77 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
80 CHECK_ORDER_OPTION
= CHAR_MAX
+ 1,
82 OUTPUT_DELIMITER_OPTION
85 static struct option
const long_options
[] =
87 {"check-order", no_argument
, NULL
, CHECK_ORDER_OPTION
},
88 {"nocheck-order", no_argument
, NULL
, NOCHECK_ORDER_OPTION
},
89 {"output-delimiter", required_argument
, NULL
, OUTPUT_DELIMITER_OPTION
},
90 {GETOPT_HELP_OPTION_DECL
},
91 {GETOPT_VERSION_OPTION_DECL
},
100 if (status
!= EXIT_SUCCESS
)
101 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
106 Usage: %s [OPTION]... FILE1 FILE2\n\
110 Compare sorted files FILE1 and FILE2 line by line.\n\
114 With no options, produce three-column output. Column one contains\n\
115 lines unique to FILE1, column two contains lines unique to FILE2,\n\
116 and column three contains lines common to both files.\n\
120 -1 suppress column 1 (lines unique to FILE1)\n\
121 -2 suppress column 2 (lines unique to FILE2)\n\
122 -3 suppress column 3 (lines that appear in both files)\n\
126 --check-order check that the input is correctly sorted, even\n\
127 if all input lines are pairable\n\
128 --nocheck-order do not check that the input is correctly sorted\n\
131 --output-delimiter=STR separate columns with STR\n\
133 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
134 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
137 Note, comparisons honor the rules specified by `LC_COLLATE'.\n\
142 %s -12 file1 file2 Print only lines present in both file1 and file2.\n\
143 %s -3 file1 file2 Print lines in file1 not in file2, and vice versa.\n\
145 program_name
, program_name
);
146 emit_ancillary_info ();
151 /* Output the line in linebuffer LINE to stream STREAM
152 provided the switches say it should be output.
153 CLASS is 1 for a line found only in file 1,
154 2 for a line only in file 2, 3 for a line in both. */
157 writeline (struct linebuffer
const *line
, FILE *stream
, int class)
169 /* Print a delimiter if we are printing lines from file 1. */
171 fputs (delimiter
, stream
);
177 /* Print a delimiter if we are printing lines from file 1. */
179 fputs (delimiter
, stream
);
180 /* Print a delimiter if we are printing lines from file 2. */
182 fputs (delimiter
, stream
);
186 fwrite (line
->buffer
, sizeof (char), line
->length
, stream
);
189 /* Check that successive input lines PREV and CURRENT from input file
190 WHATFILE are presented in order.
192 If the user specified --nocheck-order, the check is not made.
193 If the user specified --check-order, the problem is fatal.
194 Otherwise (the default), the message is simply a warning.
196 A message is printed at most once per input file.
198 This funtion was copied (nearly) verbatim from `src/join.c'. */
201 check_order (struct linebuffer
const *prev
,
202 struct linebuffer
const *current
,
206 if (check_input_order
!= CHECK_ORDER_DISABLED
207 && ((check_input_order
== CHECK_ORDER_ENABLED
) || seen_unpairable
))
209 if (!issued_disorder_warning
[whatfile
- 1])
214 order
= xmemcoll (prev
->buffer
, prev
->length
- 1,
215 current
->buffer
, current
->length
- 1);
217 order
= memcmp2 (prev
->buffer
, prev
->length
- 1,
218 current
->buffer
, current
->length
- 1);
222 error ((check_input_order
== CHECK_ORDER_ENABLED
224 0, _("file %d is not in sorted order"), whatfile
);
226 /* If we get to here, the message was just a warning, but we
227 want only to issue it once. */
228 issued_disorder_warning
[whatfile
- 1] = true;
234 /* Compare INFILES[0] and INFILES[1].
235 If either is "-", use the standard input for that file.
236 Assume that each input file is sorted;
237 merge them and output the result. */
240 compare_files (char **infiles
)
242 /* For each file, we have four linebuffers in lba. */
243 struct linebuffer lba
[2][4];
245 /* thisline[i] points to the linebuffer holding the next available line
246 in file i, or is NULL if there are no lines left in that file. */
247 struct linebuffer
*thisline
[2];
249 /* all_line[i][alt[i][0]] also points to the linebuffer holding the
250 current line in file i. We keep two buffers of history around so we
251 can look two lines back when we get to the end of a file. */
252 struct linebuffer
*all_line
[2][4];
254 /* This is used to rotate through the buffers for each input file. */
257 /* streams[i] holds the input stream for file i. */
262 /* Initialize the storage. */
263 for (i
= 0; i
< 2; i
++)
265 for (j
= 0; j
< 4; j
++)
267 initbuffer (&lba
[i
][j
]);
268 all_line
[i
][j
] = &lba
[i
][j
];
273 streams
[i
] = (STREQ (infiles
[i
], "-") ? stdin
: fopen (infiles
[i
], "r"));
275 error (EXIT_FAILURE
, errno
, "%s", infiles
[i
]);
277 fadvise (streams
[i
], FADVISE_SEQUENTIAL
);
279 thisline
[i
] = readlinebuffer (all_line
[i
][alt
[i
][0]], streams
[i
]);
280 if (ferror (streams
[i
]))
281 error (EXIT_FAILURE
, errno
, "%s", infiles
[i
]);
284 while (thisline
[0] || thisline
[1])
287 bool fill_up
[2] = { false, false };
289 /* Compare the next available lines of the two files. */
293 else if (!thisline
[1])
298 order
= xmemcoll (thisline
[0]->buffer
, thisline
[0]->length
- 1,
299 thisline
[1]->buffer
, thisline
[1]->length
- 1);
302 size_t len
= min (thisline
[0]->length
, thisline
[1]->length
) - 1;
303 order
= memcmp (thisline
[0]->buffer
, thisline
[1]->buffer
, len
);
305 order
= (thisline
[0]->length
< thisline
[1]->length
307 : thisline
[0]->length
!= thisline
[1]->length
);
311 /* Output the line that is lesser. */
313 writeline (thisline
[1], stdout
, 3);
316 seen_unpairable
= true;
318 writeline (thisline
[0], stdout
, 1);
320 writeline (thisline
[1], stdout
, 2);
323 /* Step the file the line came from.
324 If the files match, step both files. */
330 for (i
= 0; i
< 2; i
++)
333 /* Rotate the buffers for this file. */
334 alt
[i
][2] = alt
[i
][1];
335 alt
[i
][1] = alt
[i
][0];
336 alt
[i
][0] = (alt
[i
][0] + 1) & 0x03;
338 thisline
[i
] = readlinebuffer (all_line
[i
][alt
[i
][0]], streams
[i
]);
341 check_order (all_line
[i
][alt
[i
][1]], thisline
[i
], i
+ 1);
343 /* If this is the end of the file we may need to re-check
344 the order of the previous two lines, since we might have
345 discovered an unpairable match since we checked before. */
346 else if (all_line
[i
][alt
[i
][2]]->buffer
)
347 check_order (all_line
[i
][alt
[i
][2]],
348 all_line
[i
][alt
[i
][1]], i
+ 1);
350 if (ferror (streams
[i
]))
351 error (EXIT_FAILURE
, errno
, "%s", infiles
[i
]);
357 for (i
= 0; i
< 2; i
++)
358 if (fclose (streams
[i
]) != 0)
359 error (EXIT_FAILURE
, errno
, "%s", infiles
[i
]);
363 main (int argc
, char **argv
)
367 initialize_main (&argc
, &argv
);
368 set_program_name (argv
[0]);
369 setlocale (LC_ALL
, "");
370 bindtextdomain (PACKAGE
, LOCALEDIR
);
371 textdomain (PACKAGE
);
372 hard_LC_COLLATE
= hard_locale (LC_COLLATE
);
374 atexit (close_stdout
);
380 seen_unpairable
= false;
381 issued_disorder_warning
[0] = issued_disorder_warning
[1] = false;
382 check_input_order
= CHECK_ORDER_DEFAULT
;
384 while ((c
= getopt_long (argc
, argv
, "123", long_options
, NULL
)) != -1)
399 case NOCHECK_ORDER_OPTION
:
400 check_input_order
= CHECK_ORDER_DISABLED
;
403 case CHECK_ORDER_OPTION
:
404 check_input_order
= CHECK_ORDER_ENABLED
;
407 case OUTPUT_DELIMITER_OPTION
:
408 if (delimiter
&& !STREQ (delimiter
, optarg
))
409 error (EXIT_FAILURE
, 0, _("multiple delimiters specified"));
413 error (EXIT_FAILURE
, 0, _("empty %s not allowed"),
414 quote ("--output-delimiter"));
418 case_GETOPT_HELP_CHAR
;
420 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
423 usage (EXIT_FAILURE
);
426 if (argc
- optind
< 2)
429 error (0, 0, _("missing operand"));
431 error (0, 0, _("missing operand after %s"), quote (argv
[argc
- 1]));
432 usage (EXIT_FAILURE
);
435 if (2 < argc
- optind
)
437 error (0, 0, _("extra operand %s"), quote (argv
[optind
+ 2]));
438 usage (EXIT_FAILURE
);
441 /* The default delimiter is a TAB. */
445 compare_files (argv
+ optind
);
447 if (issued_disorder_warning
[0] || issued_disorder_warning
[1])