version 8.7
[coreutils.git] / src / comm.c
blob06b80b0713ec64e62104dfffce27055252df0a40
1 /* comm -- compare two sorted files line by line.
2 Copyright (C) 1986, 1990-1991, 1995-2005, 2008-2010 Free Software
3 Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 /* Written by Richard Stallman and David MacKenzie. */
20 #include <config.h>
22 #include <getopt.h>
23 #include <sys/types.h>
24 #include "system.h"
25 #include "linebuffer.h"
26 #include "error.h"
27 #include "fadvise.h"
28 #include "hard-locale.h"
29 #include "quote.h"
30 #include "stdio--.h"
31 #include "memcmp2.h"
32 #include "xmemcoll.h"
34 /* The official name of this program (e.g., no `g' prefix). */
35 #define PROGRAM_NAME "comm"
37 #define AUTHORS \
38 proper_name ("Richard M. Stallman"), \
39 proper_name ("David MacKenzie")
41 /* Undefine, to avoid warning about redefinition on some systems. */
42 #undef min
43 #define min(x, y) ((x) < (y) ? (x) : (y))
45 /* True if the LC_COLLATE locale is hard. */
46 static bool hard_LC_COLLATE;
48 /* If true, print lines that are found only in file 1. */
49 static bool only_file_1;
51 /* If true, print lines that are found only in file 2. */
52 static bool only_file_2;
54 /* If true, print lines that are found in both files. */
55 static bool both;
57 /* If nonzero, we have seen at least one unpairable line. */
58 static bool seen_unpairable;
60 /* If nonzero, we have warned about disorder in that file. */
61 static bool issued_disorder_warning[2];
63 /* If nonzero, check that the input is correctly ordered. */
64 static enum
66 CHECK_ORDER_DEFAULT,
67 CHECK_ORDER_ENABLED,
68 CHECK_ORDER_DISABLED
69 } check_input_order;
71 /* Output columns will be delimited with this string, which may be set
72 on the command-line with --output-delimiter=STR. The default is a
73 single TAB character. */
74 static char const *delimiter;
76 /* For long options that have no equivalent short option, use a
77 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
78 enum
80 CHECK_ORDER_OPTION = CHAR_MAX + 1,
81 NOCHECK_ORDER_OPTION,
82 OUTPUT_DELIMITER_OPTION
85 static struct option const long_options[] =
87 {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
88 {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
89 {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
90 {GETOPT_HELP_OPTION_DECL},
91 {GETOPT_VERSION_OPTION_DECL},
92 {NULL, 0, NULL, 0}
97 void
98 usage (int status)
100 if (status != EXIT_SUCCESS)
101 fprintf (stderr, _("Try `%s --help' for more information.\n"),
102 program_name);
103 else
105 printf (_("\
106 Usage: %s [OPTION]... FILE1 FILE2\n\
108 program_name);
109 fputs (_("\
110 Compare sorted files FILE1 and FILE2 line by line.\n\
111 "), stdout);
112 fputs (_("\
114 With no options, produce three-column output. Column one contains\n\
115 lines unique to FILE1, column two contains lines unique to FILE2,\n\
116 and column three contains lines common to both files.\n\
117 "), stdout);
118 fputs (_("\
120 -1 suppress column 1 (lines unique to FILE1)\n\
121 -2 suppress column 2 (lines unique to FILE2)\n\
122 -3 suppress column 3 (lines that appear in both files)\n\
123 "), stdout);
124 fputs (_("\
126 --check-order check that the input is correctly sorted, even\n\
127 if all input lines are pairable\n\
128 --nocheck-order do not check that the input is correctly sorted\n\
129 "), stdout);
130 fputs (_("\
131 --output-delimiter=STR separate columns with STR\n\
132 "), stdout);
133 fputs (HELP_OPTION_DESCRIPTION, stdout);
134 fputs (VERSION_OPTION_DESCRIPTION, stdout);
135 fputs (_("\
137 Note, comparisons honor the rules specified by `LC_COLLATE'.\n\
138 "), stdout);
139 printf (_("\
141 Examples:\n\
142 %s -12 file1 file2 Print only lines present in both file1 and file2.\n\
143 %s -3 file1 file2 Print lines in file1 not in file2, and vice versa.\n\
145 program_name, program_name);
146 emit_ancillary_info ();
148 exit (status);
151 /* Output the line in linebuffer LINE to stream STREAM
152 provided the switches say it should be output.
153 CLASS is 1 for a line found only in file 1,
154 2 for a line only in file 2, 3 for a line in both. */
156 static void
157 writeline (struct linebuffer const *line, FILE *stream, int class)
159 switch (class)
161 case 1:
162 if (!only_file_1)
163 return;
164 break;
166 case 2:
167 if (!only_file_2)
168 return;
169 /* Print a delimiter if we are printing lines from file 1. */
170 if (only_file_1)
171 fputs (delimiter, stream);
172 break;
174 case 3:
175 if (!both)
176 return;
177 /* Print a delimiter if we are printing lines from file 1. */
178 if (only_file_1)
179 fputs (delimiter, stream);
180 /* Print a delimiter if we are printing lines from file 2. */
181 if (only_file_2)
182 fputs (delimiter, stream);
183 break;
186 fwrite (line->buffer, sizeof (char), line->length, stream);
189 /* Check that successive input lines PREV and CURRENT from input file
190 WHATFILE are presented in order.
192 If the user specified --nocheck-order, the check is not made.
193 If the user specified --check-order, the problem is fatal.
194 Otherwise (the default), the message is simply a warning.
196 A message is printed at most once per input file.
198 This funtion was copied (nearly) verbatim from `src/join.c'. */
200 static void
201 check_order (struct linebuffer const *prev,
202 struct linebuffer const *current,
203 int whatfile)
206 if (check_input_order != CHECK_ORDER_DISABLED
207 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
209 if (!issued_disorder_warning[whatfile - 1])
211 int order;
213 if (hard_LC_COLLATE)
214 order = xmemcoll (prev->buffer, prev->length - 1,
215 current->buffer, current->length - 1);
216 else
217 order = memcmp2 (prev->buffer, prev->length - 1,
218 current->buffer, current->length - 1);
220 if (0 < order)
222 error ((check_input_order == CHECK_ORDER_ENABLED
223 ? EXIT_FAILURE : 0),
224 0, _("file %d is not in sorted order"), whatfile);
226 /* If we get to here, the message was just a warning, but we
227 want only to issue it once. */
228 issued_disorder_warning[whatfile - 1] = true;
234 /* Compare INFILES[0] and INFILES[1].
235 If either is "-", use the standard input for that file.
236 Assume that each input file is sorted;
237 merge them and output the result. */
239 static void
240 compare_files (char **infiles)
242 /* For each file, we have four linebuffers in lba. */
243 struct linebuffer lba[2][4];
245 /* thisline[i] points to the linebuffer holding the next available line
246 in file i, or is NULL if there are no lines left in that file. */
247 struct linebuffer *thisline[2];
249 /* all_line[i][alt[i][0]] also points to the linebuffer holding the
250 current line in file i. We keep two buffers of history around so we
251 can look two lines back when we get to the end of a file. */
252 struct linebuffer *all_line[2][4];
254 /* This is used to rotate through the buffers for each input file. */
255 int alt[2][3];
257 /* streams[i] holds the input stream for file i. */
258 FILE *streams[2];
260 int i, j;
262 /* Initialize the storage. */
263 for (i = 0; i < 2; i++)
265 for (j = 0; j < 4; j++)
267 initbuffer (&lba[i][j]);
268 all_line[i][j] = &lba[i][j];
270 alt[i][0] = 0;
271 alt[i][1] = 0;
272 alt[i][2] = 0;
273 streams[i] = (STREQ (infiles[i], "-") ? stdin : fopen (infiles[i], "r"));
274 if (!streams[i])
275 error (EXIT_FAILURE, errno, "%s", infiles[i]);
277 fadvise (streams[i], FADVISE_SEQUENTIAL);
279 thisline[i] = readlinebuffer (all_line[i][alt[i][0]], streams[i]);
280 if (ferror (streams[i]))
281 error (EXIT_FAILURE, errno, "%s", infiles[i]);
284 while (thisline[0] || thisline[1])
286 int order;
287 bool fill_up[2] = { false, false };
289 /* Compare the next available lines of the two files. */
291 if (!thisline[0])
292 order = 1;
293 else if (!thisline[1])
294 order = -1;
295 else
297 if (hard_LC_COLLATE)
298 order = xmemcoll (thisline[0]->buffer, thisline[0]->length - 1,
299 thisline[1]->buffer, thisline[1]->length - 1);
300 else
302 size_t len = min (thisline[0]->length, thisline[1]->length) - 1;
303 order = memcmp (thisline[0]->buffer, thisline[1]->buffer, len);
304 if (order == 0)
305 order = (thisline[0]->length < thisline[1]->length
306 ? -1
307 : thisline[0]->length != thisline[1]->length);
311 /* Output the line that is lesser. */
312 if (order == 0)
313 writeline (thisline[1], stdout, 3);
314 else
316 seen_unpairable = true;
317 if (order <= 0)
318 writeline (thisline[0], stdout, 1);
319 else
320 writeline (thisline[1], stdout, 2);
323 /* Step the file the line came from.
324 If the files match, step both files. */
325 if (0 <= order)
326 fill_up[1] = true;
327 if (order <= 0)
328 fill_up[0] = true;
330 for (i = 0; i < 2; i++)
331 if (fill_up[i])
333 /* Rotate the buffers for this file. */
334 alt[i][2] = alt[i][1];
335 alt[i][1] = alt[i][0];
336 alt[i][0] = (alt[i][0] + 1) & 0x03;
338 thisline[i] = readlinebuffer (all_line[i][alt[i][0]], streams[i]);
340 if (thisline[i])
341 check_order (all_line[i][alt[i][1]], thisline[i], i + 1);
343 /* If this is the end of the file we may need to re-check
344 the order of the previous two lines, since we might have
345 discovered an unpairable match since we checked before. */
346 else if (all_line[i][alt[i][2]]->buffer)
347 check_order (all_line[i][alt[i][2]],
348 all_line[i][alt[i][1]], i + 1);
350 if (ferror (streams[i]))
351 error (EXIT_FAILURE, errno, "%s", infiles[i]);
353 fill_up[i] = false;
357 for (i = 0; i < 2; i++)
358 if (fclose (streams[i]) != 0)
359 error (EXIT_FAILURE, errno, "%s", infiles[i]);
363 main (int argc, char **argv)
365 int c;
367 initialize_main (&argc, &argv);
368 set_program_name (argv[0]);
369 setlocale (LC_ALL, "");
370 bindtextdomain (PACKAGE, LOCALEDIR);
371 textdomain (PACKAGE);
372 hard_LC_COLLATE = hard_locale (LC_COLLATE);
374 atexit (close_stdout);
376 only_file_1 = true;
377 only_file_2 = true;
378 both = true;
380 seen_unpairable = false;
381 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
382 check_input_order = CHECK_ORDER_DEFAULT;
384 while ((c = getopt_long (argc, argv, "123", long_options, NULL)) != -1)
385 switch (c)
387 case '1':
388 only_file_1 = false;
389 break;
391 case '2':
392 only_file_2 = false;
393 break;
395 case '3':
396 both = false;
397 break;
399 case NOCHECK_ORDER_OPTION:
400 check_input_order = CHECK_ORDER_DISABLED;
401 break;
403 case CHECK_ORDER_OPTION:
404 check_input_order = CHECK_ORDER_ENABLED;
405 break;
407 case OUTPUT_DELIMITER_OPTION:
408 if (delimiter && !STREQ (delimiter, optarg))
409 error (EXIT_FAILURE, 0, _("multiple delimiters specified"));
410 delimiter = optarg;
411 if (!*delimiter)
413 error (EXIT_FAILURE, 0, _("empty %s not allowed"),
414 quote ("--output-delimiter"));
416 break;
418 case_GETOPT_HELP_CHAR;
420 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
422 default:
423 usage (EXIT_FAILURE);
426 if (argc - optind < 2)
428 if (argc <= optind)
429 error (0, 0, _("missing operand"));
430 else
431 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
432 usage (EXIT_FAILURE);
435 if (2 < argc - optind)
437 error (0, 0, _("extra operand %s"), quote (argv[optind + 2]));
438 usage (EXIT_FAILURE);
441 /* The default delimiter is a TAB. */
442 if (!delimiter)
443 delimiter = "\t";
445 compare_files (argv + optind);
447 if (issued_disorder_warning[0] || issued_disorder_warning[1])
448 exit (EXIT_FAILURE);
449 else
450 exit (EXIT_SUCCESS);