tests: unpack xz-compressed tarballs when possible, not always *.gz
[coreutils.git] / src / join.c
blobd734a91ea2995b1e9da9af3f46653ab21a3742b9
1 /* join - join lines of two files on a common field
2 Copyright (C) 91, 1995-2006, 2008-2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
19 #include <config.h>
21 #include <assert.h>
22 #include <sys/types.h>
23 #include <getopt.h>
25 #include "system.h"
26 #include "error.h"
27 #include "hard-locale.h"
28 #include "linebuffer.h"
29 #include "memcasecmp.h"
30 #include "quote.h"
31 #include "stdio--.h"
32 #include "xmemcoll.h"
33 #include "xstrtol.h"
34 #include "argmatch.h"
36 /* The official name of this program (e.g., no `g' prefix). */
37 #define PROGRAM_NAME "join"
39 #define AUTHORS proper_name ("Mike Haertel")
41 #define join system_join
43 #define SWAPLINES(a, b) do { \
44 struct line *tmp = a; \
45 a = b; \
46 b = tmp; \
47 } while (0);
49 /* An element of the list identifying which fields to print for each
50 output line. */
51 struct outlist
53 /* File number: 0, 1, or 2. 0 means use the join field.
54 1 means use the first file argument, 2 the second. */
55 int file;
57 /* Field index (zero-based), specified only when FILE is 1 or 2. */
58 size_t field;
60 struct outlist *next;
63 /* A field of a line. */
64 struct field
66 char *beg; /* First character in field. */
67 size_t len; /* The length of the field. */
70 /* A line read from an input file. */
71 struct line
73 struct linebuffer buf; /* The line itself. */
74 size_t nfields; /* Number of elements in `fields'. */
75 size_t nfields_allocated; /* Number of elements allocated for `fields'. */
76 struct field *fields;
79 /* One or more consecutive lines read from a file that all have the
80 same join field value. */
81 struct seq
83 size_t count; /* Elements used in `lines'. */
84 size_t alloc; /* Elements allocated in `lines'. */
85 struct line **lines;
88 /* The previous line read from each file. */
89 static struct line *prevline[2] = {NULL, NULL};
91 /* This provides an extra line buffer for each file. We need these if we
92 try to read two consecutive lines into the same buffer, since we don't
93 want to overwrite the previous buffer before we check order. */
94 static struct line *spareline[2] = {NULL, NULL};
96 /* True if the LC_COLLATE locale is hard. */
97 static bool hard_LC_COLLATE;
99 /* If nonzero, print unpairable lines in file 1 or 2. */
100 static bool print_unpairables_1, print_unpairables_2;
102 /* If nonzero, print pairable lines. */
103 static bool print_pairables;
105 /* If nonzero, we have seen at least one unpairable line. */
106 static bool seen_unpairable;
108 /* If nonzero, we have warned about disorder in that file. */
109 static bool issued_disorder_warning[2];
111 /* Empty output field filler. */
112 static char const *empty_filler;
114 /* Field to join on; SIZE_MAX means they haven't been determined yet. */
115 static size_t join_field_1 = SIZE_MAX;
116 static size_t join_field_2 = SIZE_MAX;
118 /* List of fields to print. */
119 static struct outlist outlist_head;
121 /* Last element in `outlist', where a new element can be added. */
122 static struct outlist *outlist_end = &outlist_head;
124 /* Tab character separating fields. If negative, fields are separated
125 by any nonempty string of blanks, otherwise by exactly one
126 tab character whose value (when cast to unsigned char) equals TAB. */
127 static int tab = -1;
129 /* If nonzero, check that the input is correctly ordered. */
130 static enum
132 CHECK_ORDER_DEFAULT,
133 CHECK_ORDER_ENABLED,
134 CHECK_ORDER_DISABLED
135 } check_input_order;
137 enum
139 CHECK_ORDER_OPTION = CHAR_MAX + 1,
140 NOCHECK_ORDER_OPTION
144 static struct option const longopts[] =
146 {"ignore-case", no_argument, NULL, 'i'},
147 {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
148 {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
149 {GETOPT_HELP_OPTION_DECL},
150 {GETOPT_VERSION_OPTION_DECL},
151 {NULL, 0, NULL, 0}
154 /* Used to print non-joining lines */
155 static struct line uni_blank;
157 /* If nonzero, ignore case when comparing join fields. */
158 static bool ignore_case;
160 void
161 usage (int status)
163 if (status != EXIT_SUCCESS)
164 fprintf (stderr, _("Try `%s --help' for more information.\n"),
165 program_name);
166 else
168 printf (_("\
169 Usage: %s [OPTION]... FILE1 FILE2\n\
171 program_name);
172 fputs (_("\
173 For each pair of input lines with identical join fields, write a line to\n\
174 standard output. The default join field is the first, delimited\n\
175 by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
177 -a FILENUM print unpairable lines coming from file FILENUM, where\n\
178 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
179 -e EMPTY replace missing input fields with EMPTY\n\
180 "), stdout);
181 fputs (_("\
182 -i, --ignore-case ignore differences in case when comparing fields\n\
183 -j FIELD equivalent to `-1 FIELD -2 FIELD'\n\
184 -o FORMAT obey FORMAT while constructing output line\n\
185 -t CHAR use CHAR as input and output field separator\n\
186 "), stdout);
187 fputs (_("\
188 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
189 -1 FIELD join on this FIELD of file 1\n\
190 -2 FIELD join on this FIELD of file 2\n\
191 --check-order check that the input is correctly sorted, even\n\
192 if all input lines are pairable\n\
193 --nocheck-order do not check that the input is correctly sorted\n\
194 "), stdout);
195 fputs (HELP_OPTION_DESCRIPTION, stdout);
196 fputs (VERSION_OPTION_DESCRIPTION, stdout);
197 fputs (_("\
199 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
200 else fields are separated by CHAR. Any FIELD is a field number counted\n\
201 from 1. FORMAT is one or more comma or blank separated specifications,\n\
202 each being `FILENUM.FIELD' or `0'. Default FORMAT outputs the join field,\n\
203 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
204 separated by CHAR.\n\
206 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
207 E.g., use `sort -k 1b,1' if `join' has no options.\n\
208 Note, comparisons honor the rules specified by `LC_COLLATE'.\n\
209 If the input is not sorted and some lines cannot be joined, a\n\
210 warning message will be given.\n\
211 "), stdout);
212 emit_ancillary_info ();
214 exit (status);
217 /* Record a field in LINE, with location FIELD and size LEN. */
219 static void
220 extract_field (struct line *line, char *field, size_t len)
222 if (line->nfields >= line->nfields_allocated)
224 line->fields = X2NREALLOC (line->fields, &line->nfields_allocated);
226 line->fields[line->nfields].beg = field;
227 line->fields[line->nfields].len = len;
228 ++(line->nfields);
231 /* Fill in the `fields' structure in LINE. */
233 static void
234 xfields (struct line *line)
236 char *ptr = line->buf.buffer;
237 char const *lim = ptr + line->buf.length - 1;
239 if (ptr == lim)
240 return;
242 if (0 <= tab)
244 char *sep;
245 for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
246 extract_field (line, ptr, sep - ptr);
248 else
250 /* Skip leading blanks before the first field. */
251 while (isblank (to_uchar (*ptr)))
252 if (++ptr == lim)
253 return;
257 char *sep;
258 for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++)
259 continue;
260 extract_field (line, ptr, sep - ptr);
261 if (sep == lim)
262 return;
263 for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++)
264 continue;
266 while (ptr != lim);
269 extract_field (line, ptr, lim - ptr);
272 static void
273 freeline (struct line *line)
275 free (line->fields);
276 free (line->buf.buffer);
277 line->buf.buffer = NULL;
280 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
281 >0 if it compares greater; 0 if it compares equal.
282 Report an error and exit if the comparison fails.
283 Use join fields JF_1 and JF_2 respectively. */
285 static int
286 keycmp (struct line const *line1, struct line const *line2,
287 size_t jf_1, size_t jf_2)
289 /* Start of field to compare in each file. */
290 char *beg1;
291 char *beg2;
293 size_t len1;
294 size_t len2; /* Length of fields to compare. */
295 int diff;
297 if (jf_1 < line1->nfields)
299 beg1 = line1->fields[jf_1].beg;
300 len1 = line1->fields[jf_1].len;
302 else
304 beg1 = NULL;
305 len1 = 0;
308 if (jf_2 < line2->nfields)
310 beg2 = line2->fields[jf_2].beg;
311 len2 = line2->fields[jf_2].len;
313 else
315 beg2 = NULL;
316 len2 = 0;
319 if (len1 == 0)
320 return len2 == 0 ? 0 : -1;
321 if (len2 == 0)
322 return 1;
324 if (ignore_case)
326 /* FIXME: ignore_case does not work with NLS (in particular,
327 with multibyte chars). */
328 diff = memcasecmp (beg1, beg2, MIN (len1, len2));
330 else
332 if (hard_LC_COLLATE)
333 return xmemcoll (beg1, len1, beg2, len2);
334 diff = memcmp (beg1, beg2, MIN (len1, len2));
337 if (diff)
338 return diff;
339 return len1 < len2 ? -1 : len1 != len2;
342 /* Check that successive input lines PREV and CURRENT from input file
343 WHATFILE are presented in order, unless the user may be relying on
344 the GNU extension that input lines may be out of order if no input
345 lines are unpairable.
347 If the user specified --nocheck-order, the check is not made.
348 If the user specified --check-order, the problem is fatal.
349 Otherwise (the default), the message is simply a warning.
351 A message is printed at most once per input file. */
353 static void
354 check_order (const struct line *prev,
355 const struct line *current,
356 int whatfile)
358 if (check_input_order != CHECK_ORDER_DISABLED
359 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
361 if (!issued_disorder_warning[whatfile-1])
363 size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
364 if (keycmp (prev, current, join_field, join_field) > 0)
366 error ((check_input_order == CHECK_ORDER_ENABLED
367 ? EXIT_FAILURE : 0),
368 0, _("file %d is not in sorted order"), whatfile);
370 /* If we get to here, the message was just a warning, but we
371 want only to issue it once. */
372 issued_disorder_warning[whatfile-1] = true;
378 static inline void
379 reset_line (struct line *line)
381 line->nfields = 0;
384 static struct line *
385 init_linep (struct line **linep)
387 struct line *line = xmalloc (sizeof *line);
388 memset (line, '\0', sizeof *line);
389 *linep = line;
390 return line;
393 /* Read a line from FP into LINE and split it into fields.
394 Return true if successful. */
396 static bool
397 get_line (FILE *fp, struct line **linep, int which)
399 struct line *line = *linep;
401 if (line == prevline[which - 1])
403 SWAPLINES (line, spareline[which - 1]);
404 *linep = line;
407 if (line)
408 reset_line (line);
409 else
410 line = init_linep (linep);
412 if (! readlinebuffer (&line->buf, fp))
414 if (ferror (fp))
415 error (EXIT_FAILURE, errno, _("read error"));
416 freeline (line);
417 return false;
420 xfields (line);
422 if (prevline[which - 1])
423 check_order (prevline[which - 1], line, which);
425 prevline[which - 1] = line;
426 return true;
429 static void
430 free_spareline (void)
432 size_t i;
434 for (i = 0; i < ARRAY_CARDINALITY (spareline); i++)
436 if (spareline[i])
438 freeline (spareline[i]);
439 free (spareline[i]);
444 static void
445 initseq (struct seq *seq)
447 seq->count = 0;
448 seq->alloc = 0;
449 seq->lines = NULL;
452 /* Read a line from FP and add it to SEQ. Return true if successful. */
454 static bool
455 getseq (FILE *fp, struct seq *seq, int whichfile)
457 if (seq->count == seq->alloc)
459 size_t i;
460 seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
461 for (i = seq->count; i < seq->alloc; i++)
462 seq->lines[i] = NULL;
465 if (get_line (fp, &seq->lines[seq->count], whichfile))
467 ++seq->count;
468 return true;
470 return false;
473 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
474 true, else as the next. */
475 static bool
476 advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
478 if (first)
479 seq->count = 0;
481 return getseq (fp, seq, whichfile);
484 static void
485 delseq (struct seq *seq)
487 size_t i;
488 for (i = 0; i < seq->alloc; i++)
489 if (seq->lines[i])
491 if (seq->lines[i]->buf.buffer)
492 freeline (seq->lines[i]);
493 free (seq->lines[i]);
495 free (seq->lines);
499 /* Print field N of LINE if it exists and is nonempty, otherwise
500 `empty_filler' if it is nonempty. */
502 static void
503 prfield (size_t n, struct line const *line)
505 size_t len;
507 if (n < line->nfields)
509 len = line->fields[n].len;
510 if (len)
511 fwrite (line->fields[n].beg, 1, len, stdout);
512 else if (empty_filler)
513 fputs (empty_filler, stdout);
515 else if (empty_filler)
516 fputs (empty_filler, stdout);
519 /* Print the join of LINE1 and LINE2. */
521 static void
522 prjoin (struct line const *line1, struct line const *line2)
524 const struct outlist *outlist;
525 char output_separator = tab < 0 ? ' ' : tab;
527 outlist = outlist_head.next;
528 if (outlist)
530 const struct outlist *o;
532 o = outlist;
533 while (1)
535 size_t field;
536 struct line const *line;
538 if (o->file == 0)
540 if (line1 == &uni_blank)
542 line = line2;
543 field = join_field_2;
545 else
547 line = line1;
548 field = join_field_1;
551 else
553 line = (o->file == 1 ? line1 : line2);
554 field = o->field;
556 prfield (field, line);
557 o = o->next;
558 if (o == NULL)
559 break;
560 putchar (output_separator);
562 putchar ('\n');
564 else
566 size_t i;
568 if (line1 == &uni_blank)
570 struct line const *t;
571 t = line1;
572 line1 = line2;
573 line2 = t;
575 prfield (join_field_1, line1);
576 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
578 putchar (output_separator);
579 prfield (i, line1);
581 for (i = join_field_1 + 1; i < line1->nfields; ++i)
583 putchar (output_separator);
584 prfield (i, line1);
587 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
589 putchar (output_separator);
590 prfield (i, line2);
592 for (i = join_field_2 + 1; i < line2->nfields; ++i)
594 putchar (output_separator);
595 prfield (i, line2);
597 putchar ('\n');
601 /* Print the join of the files in FP1 and FP2. */
603 static void
604 join (FILE *fp1, FILE *fp2)
606 struct seq seq1, seq2;
607 struct line **linep = xmalloc (sizeof *linep);
608 int diff;
609 bool eof1, eof2, checktail;
611 *linep = NULL;
613 /* Read the first line of each file. */
614 initseq (&seq1);
615 getseq (fp1, &seq1, 1);
616 initseq (&seq2);
617 getseq (fp2, &seq2, 2);
619 while (seq1.count && seq2.count)
621 size_t i;
622 diff = keycmp (seq1.lines[0], seq2.lines[0],
623 join_field_1, join_field_2);
624 if (diff < 0)
626 if (print_unpairables_1)
627 prjoin (seq1.lines[0], &uni_blank);
628 advance_seq (fp1, &seq1, true, 1);
629 seen_unpairable = true;
630 continue;
632 if (diff > 0)
634 if (print_unpairables_2)
635 prjoin (&uni_blank, seq2.lines[0]);
636 advance_seq (fp2, &seq2, true, 2);
637 seen_unpairable = true;
638 continue;
641 /* Keep reading lines from file1 as long as they continue to
642 match the current line from file2. */
643 eof1 = false;
645 if (!advance_seq (fp1, &seq1, false, 1))
647 eof1 = true;
648 ++seq1.count;
649 break;
651 while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
652 join_field_1, join_field_2));
654 /* Keep reading lines from file2 as long as they continue to
655 match the current line from file1. */
656 eof2 = false;
658 if (!advance_seq (fp2, &seq2, false, 2))
660 eof2 = true;
661 ++seq2.count;
662 break;
664 while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
665 join_field_1, join_field_2));
667 if (print_pairables)
669 for (i = 0; i < seq1.count - 1; ++i)
671 size_t j;
672 for (j = 0; j < seq2.count - 1; ++j)
673 prjoin (seq1.lines[i], seq2.lines[j]);
677 if (!eof1)
679 SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
680 seq1.count = 1;
682 else
683 seq1.count = 0;
685 if (!eof2)
687 SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
688 seq2.count = 1;
690 else
691 seq2.count = 0;
694 /* If the user did not specify --check-order, and the we read the
695 tail ends of both inputs to verify that they are in order. We
696 skip the rest of the tail once we have issued a warning for that
697 file, unless we actually need to print the unpairable lines. */
698 if (check_input_order != CHECK_ORDER_DISABLED
699 && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
700 checktail = true;
701 else
702 checktail = false;
704 if ((print_unpairables_1 || checktail) && seq1.count)
706 if (print_unpairables_1)
707 prjoin (seq1.lines[0], &uni_blank);
708 seen_unpairable = true;
709 while (get_line (fp1, linep, 1))
711 if (print_unpairables_1)
712 prjoin (*linep, &uni_blank);
713 if (issued_disorder_warning[0] && !print_unpairables_1)
714 break;
718 if ((print_unpairables_2 || checktail) && seq2.count)
720 if (print_unpairables_2)
721 prjoin (&uni_blank, seq2.lines[0]);
722 seen_unpairable = true;
723 while (get_line (fp2, linep, 2))
725 if (print_unpairables_2)
726 prjoin (&uni_blank, *linep);
727 if (issued_disorder_warning[1] && !print_unpairables_2)
728 break;
732 free (*linep);
734 free (linep);
735 delseq (&seq1);
736 delseq (&seq2);
739 /* Add a field spec for field FIELD of file FILE to `outlist'. */
741 static void
742 add_field (int file, size_t field)
744 struct outlist *o;
746 assert (file == 0 || file == 1 || file == 2);
747 assert (file != 0 || field == 0);
749 o = xmalloc (sizeof *o);
750 o->file = file;
751 o->field = field;
752 o->next = NULL;
754 /* Add to the end of the list so the fields are in the right order. */
755 outlist_end->next = o;
756 outlist_end = o;
759 /* Convert a string of decimal digits, STR (the 1-based join field number),
760 to an integral value. Upon successful conversion, return one less
761 (the zero-based field number). Silently convert too-large values
762 to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
763 diagnostic and exit. */
765 static size_t
766 string_to_join_field (char const *str)
768 size_t result;
769 unsigned long int val;
770 verify (SIZE_MAX <= ULONG_MAX);
772 strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
773 if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
774 val = SIZE_MAX;
775 else if (s_err != LONGINT_OK || val == 0)
776 error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
778 result = val - 1;
780 return result;
783 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
784 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
785 If S is valid, return true. Otherwise, give a diagnostic and exit. */
787 static void
788 decode_field_spec (const char *s, int *file_index, size_t *field_index)
790 /* The first character must be 0, 1, or 2. */
791 switch (s[0])
793 case '0':
794 if (s[1])
796 /* `0' must be all alone -- no `.FIELD'. */
797 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
799 *file_index = 0;
800 *field_index = 0;
801 break;
803 case '1':
804 case '2':
805 if (s[1] != '.')
806 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
807 *file_index = s[0] - '0';
808 *field_index = string_to_join_field (s + 2);
809 break;
811 default:
812 error (EXIT_FAILURE, 0,
813 _("invalid file number in field spec: %s"), quote (s));
815 /* Tell gcc -W -Wall that we can't get beyond this point.
816 This avoids a warning (otherwise legit) that the caller's copies
817 of *file_index and *field_index might be used uninitialized. */
818 abort ();
820 break;
824 /* Add the comma or blank separated field spec(s) in STR to `outlist'. */
826 static void
827 add_field_list (char *str)
829 char *p = str;
833 int file_index;
834 size_t field_index;
835 char const *spec_item = p;
837 p = strpbrk (p, ", \t");
838 if (p)
839 *p++ = '\0';
840 decode_field_spec (spec_item, &file_index, &field_index);
841 add_field (file_index, field_index);
843 while (p);
846 /* Set the join field *VAR to VAL, but report an error if *VAR is set
847 more than once to incompatible values. */
849 static void
850 set_join_field (size_t *var, size_t val)
852 if (*var != SIZE_MAX && *var != val)
854 unsigned long int var1 = *var + 1;
855 unsigned long int val1 = val + 1;
856 error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"),
857 var1, val1);
859 *var = val;
862 /* Status of command-line arguments. */
864 enum operand_status
866 /* This argument must be an operand, i.e., one of the files to be
867 joined. */
868 MUST_BE_OPERAND,
870 /* This might be the argument of the preceding -j1 or -j2 option,
871 or it might be an operand. */
872 MIGHT_BE_J1_ARG,
873 MIGHT_BE_J2_ARG,
875 /* This might be the argument of the preceding -o option, or it might be
876 an operand. */
877 MIGHT_BE_O_ARG
880 /* Add NAME to the array of input file NAMES with operand statuses
881 OPERAND_STATUS; currently there are NFILES names in the list. */
883 static void
884 add_file_name (char *name, char *names[2],
885 int operand_status[2], int joption_count[2], int *nfiles,
886 int *prev_optc_status, int *optc_status)
888 int n = *nfiles;
890 if (n == 2)
892 bool op0 = (operand_status[0] == MUST_BE_OPERAND);
893 char *arg = names[op0];
894 switch (operand_status[op0])
896 case MUST_BE_OPERAND:
897 error (0, 0, _("extra operand %s"), quote (name));
898 usage (EXIT_FAILURE);
900 case MIGHT_BE_J1_ARG:
901 joption_count[0]--;
902 set_join_field (&join_field_1, string_to_join_field (arg));
903 break;
905 case MIGHT_BE_J2_ARG:
906 joption_count[1]--;
907 set_join_field (&join_field_2, string_to_join_field (arg));
908 break;
910 case MIGHT_BE_O_ARG:
911 add_field_list (arg);
912 break;
914 if (!op0)
916 operand_status[0] = operand_status[1];
917 names[0] = names[1];
919 n = 1;
922 operand_status[n] = *prev_optc_status;
923 names[n] = name;
924 *nfiles = n + 1;
925 if (*prev_optc_status == MIGHT_BE_O_ARG)
926 *optc_status = MIGHT_BE_O_ARG;
930 main (int argc, char **argv)
932 int optc_status;
933 int prev_optc_status = MUST_BE_OPERAND;
934 int operand_status[2];
935 int joption_count[2] = { 0, 0 };
936 char *names[2];
937 FILE *fp1, *fp2;
938 int optc;
939 int nfiles = 0;
940 int i;
942 initialize_main (&argc, &argv);
943 set_program_name (argv[0]);
944 setlocale (LC_ALL, "");
945 bindtextdomain (PACKAGE, LOCALEDIR);
946 textdomain (PACKAGE);
947 hard_LC_COLLATE = hard_locale (LC_COLLATE);
949 atexit (close_stdout);
950 atexit (free_spareline);
952 print_pairables = true;
953 seen_unpairable = false;
954 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
955 check_input_order = CHECK_ORDER_DEFAULT;
957 while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
958 longopts, NULL))
959 != -1)
961 optc_status = MUST_BE_OPERAND;
963 switch (optc)
965 case 'v':
966 print_pairables = false;
967 /* Fall through. */
969 case 'a':
971 unsigned long int val;
972 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
973 || (val != 1 && val != 2))
974 error (EXIT_FAILURE, 0,
975 _("invalid field number: %s"), quote (optarg));
976 if (val == 1)
977 print_unpairables_1 = true;
978 else
979 print_unpairables_2 = true;
981 break;
983 case 'e':
984 if (empty_filler && ! STREQ (empty_filler, optarg))
985 error (EXIT_FAILURE, 0,
986 _("conflicting empty-field replacement strings"));
987 empty_filler = optarg;
988 break;
990 case 'i':
991 ignore_case = true;
992 break;
994 case '1':
995 set_join_field (&join_field_1, string_to_join_field (optarg));
996 break;
998 case '2':
999 set_join_field (&join_field_2, string_to_join_field (optarg));
1000 break;
1002 case 'j':
1003 if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
1004 && optarg == argv[optind - 1] + 2)
1006 /* The argument was either "-j1" or "-j2". */
1007 bool is_j2 = (optarg[0] == '2');
1008 joption_count[is_j2]++;
1009 optc_status = MIGHT_BE_J1_ARG + is_j2;
1011 else
1013 set_join_field (&join_field_1, string_to_join_field (optarg));
1014 set_join_field (&join_field_2, join_field_1);
1016 break;
1018 case 'o':
1019 add_field_list (optarg);
1020 optc_status = MIGHT_BE_O_ARG;
1021 break;
1023 case 't':
1025 unsigned char newtab = optarg[0];
1026 if (! newtab)
1027 error (EXIT_FAILURE, 0, _("empty tab"));
1028 if (optarg[1])
1030 if (STREQ (optarg, "\\0"))
1031 newtab = '\0';
1032 else
1033 error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1034 quote (optarg));
1036 if (0 <= tab && tab != newtab)
1037 error (EXIT_FAILURE, 0, _("incompatible tabs"));
1038 tab = newtab;
1040 break;
1042 case NOCHECK_ORDER_OPTION:
1043 check_input_order = CHECK_ORDER_DISABLED;
1044 break;
1046 case CHECK_ORDER_OPTION:
1047 check_input_order = CHECK_ORDER_ENABLED;
1048 break;
1050 case 1: /* Non-option argument. */
1051 add_file_name (optarg, names, operand_status, joption_count,
1052 &nfiles, &prev_optc_status, &optc_status);
1053 break;
1055 case_GETOPT_HELP_CHAR;
1057 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1059 default:
1060 usage (EXIT_FAILURE);
1063 prev_optc_status = optc_status;
1066 /* Process any operands after "--". */
1067 prev_optc_status = MUST_BE_OPERAND;
1068 while (optind < argc)
1069 add_file_name (argv[optind++], names, operand_status, joption_count,
1070 &nfiles, &prev_optc_status, &optc_status);
1072 if (nfiles != 2)
1074 if (nfiles == 0)
1075 error (0, 0, _("missing operand"));
1076 else
1077 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1078 usage (EXIT_FAILURE);
1081 /* If "-j1" was specified and it turns out not to have had an argument,
1082 treat it as "-j 1". Likewise for -j2. */
1083 for (i = 0; i < 2; i++)
1084 if (joption_count[i] != 0)
1086 set_join_field (&join_field_1, i);
1087 set_join_field (&join_field_2, i);
1090 if (join_field_1 == SIZE_MAX)
1091 join_field_1 = 0;
1092 if (join_field_2 == SIZE_MAX)
1093 join_field_2 = 0;
1095 fp1 = STREQ (names[0], "-") ? stdin : fopen (names[0], "r");
1096 if (!fp1)
1097 error (EXIT_FAILURE, errno, "%s", names[0]);
1098 fp2 = STREQ (names[1], "-") ? stdin : fopen (names[1], "r");
1099 if (!fp2)
1100 error (EXIT_FAILURE, errno, "%s", names[1]);
1101 if (fp1 == fp2)
1102 error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
1103 join (fp1, fp2);
1105 if (fclose (fp1) != 0)
1106 error (EXIT_FAILURE, errno, "%s", names[0]);
1107 if (fclose (fp2) != 0)
1108 error (EXIT_FAILURE, errno, "%s", names[1]);
1110 if (issued_disorder_warning[0] || issued_disorder_warning[1])
1111 exit (EXIT_FAILURE);
1112 else
1113 exit (EXIT_SUCCESS);