1 /* join - join lines of two files on a common field
2 Copyright (C) 1991-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
21 #include <sys/types.h>
27 #include "hard-locale.h"
28 #include "linebuffer.h"
30 #include "memcasecmp.h"
32 #include "skipchars.h"
38 /* The official name of this program (e.g., no 'g' prefix). */
39 #define PROGRAM_NAME "join"
41 #define AUTHORS proper_name ("Mike Haertel")
43 #define join system_join
45 #define SWAPLINES(a, b) do { \
46 struct line *tmp = a; \
51 /* An element of the list identifying which fields to print for each
55 /* File number: 0, 1, or 2. 0 means use the join field.
56 1 means use the first file argument, 2 the second. */
59 /* Field index (zero-based), specified only when FILE is 1 or 2. */
65 /* A field of a line. */
68 char *beg
; /* First character in field. */
69 idx_t len
; /* The length of the field. */
72 /* A line read from an input file. */
75 struct linebuffer buf
; /* The line itself. */
76 idx_t nfields
; /* Number of elements in 'fields'. */
77 idx_t nfields_allocated
; /* Number of elements allocated for 'fields'. */
81 /* One or more consecutive lines read from a file that all have the
82 same join field value. */
85 idx_t count
; /* Elements used in 'lines'. */
86 idx_t alloc
; /* Elements allocated in 'lines'. */
90 /* The previous line read from each file. */
91 static struct line
*prevline
[2] = {nullptr, nullptr};
93 /* The number of lines read from each file. */
94 static uintmax_t line_no
[2] = {0, 0};
96 /* The input file names. */
97 static char *g_names
[2];
99 /* This provides an extra line buffer for each file. We need these if we
100 try to read two consecutive lines into the same buffer, since we don't
101 want to overwrite the previous buffer before we check order. */
102 static struct line
*spareline
[2] = {nullptr, nullptr};
104 /* True if the LC_COLLATE locale is hard. */
105 static bool hard_LC_COLLATE
;
107 /* If nonzero, print unpairable lines in file 1 or 2. */
108 static bool print_unpairables_1
, print_unpairables_2
;
110 /* If nonzero, print pairable lines. */
111 static bool print_pairables
;
113 /* If nonzero, we have seen at least one unpairable line. */
114 static bool seen_unpairable
;
116 /* If nonzero, we have warned about disorder in that file. */
117 static bool issued_disorder_warning
[2];
119 /* Empty output field filler. */
120 static char const *empty_filler
;
122 /* Whether to ensure the same number of fields are output from each line. */
123 static bool autoformat
;
124 /* The number of fields to output for each line.
125 Only significant when autoformat is true. */
126 static idx_t autocount_1
;
127 static idx_t autocount_2
;
129 /* Field to join on; -1 means they haven't been determined yet. */
130 static ptrdiff_t join_field_1
= -1;
131 static ptrdiff_t join_field_2
= -1;
133 /* List of fields to print. */
134 static struct outlist outlist_head
;
136 /* Last element in 'outlist', where a new element can be added. */
137 static struct outlist
*outlist_end
= &outlist_head
;
139 /* Tab character (or encoding error) separating fields. If TAB.len == 0,
140 fields are separated by any nonempty string of blanks, otherwise by
141 exactly one tab character (or encoding error) equal to TAB. */
144 /* The output separator to use, and its length in bytes. */
145 static char const *output_separator
= " ";
146 static idx_t output_seplen
= 1;
148 /* If nonzero, check that the input is correctly ordered. */
158 CHECK_ORDER_OPTION
= CHAR_MAX
+ 1,
159 NOCHECK_ORDER_OPTION
,
164 static struct option
const longopts
[] =
166 {"ignore-case", no_argument
, nullptr, 'i'},
167 {"check-order", no_argument
, nullptr, CHECK_ORDER_OPTION
},
168 {"nocheck-order", no_argument
, nullptr, NOCHECK_ORDER_OPTION
},
169 {"zero-terminated", no_argument
, nullptr, 'z'},
170 {"header", no_argument
, nullptr, HEADER_LINE_OPTION
},
171 {GETOPT_HELP_OPTION_DECL
},
172 {GETOPT_VERSION_OPTION_DECL
},
173 {nullptr, 0, nullptr, 0}
176 /* Used to print non-joining lines */
177 static struct line uni_blank
;
179 /* If nonzero, ignore case when comparing join fields. */
180 static bool ignore_case
;
182 /* If nonzero, treat the first line of each file as column headers --
183 join them without checking for ordering */
184 static bool join_header_lines
;
186 /* The character marking end of line. Default to \n. */
187 static char eolchar
= '\n';
192 if (status
!= EXIT_SUCCESS
)
197 Usage: %s [OPTION]... FILE1 FILE2\n\
201 For each pair of input lines with identical join fields, write a line to\n\
202 standard output. The default join field is the first, delimited by blanks.\
207 When FILE1 or FILE2 (not both) is -, read standard input.\n\
211 -a FILENUM also print unpairable lines from file FILENUM, where\n\
212 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
215 -e STRING replace missing (empty) input fields with STRING;\n\
216 I.e., missing fields specified with '-12jo' options\
220 -i, --ignore-case ignore differences in case when comparing fields\n\
221 -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\
222 -o FORMAT obey FORMAT while constructing output line\n\
223 -t CHAR use CHAR as input and output field separator\n\
226 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
227 -1 FIELD join on this FIELD of file 1\n\
228 -2 FIELD join on this FIELD of file 2\n\
229 --check-order check that the input is correctly sorted, even\n\
230 if all input lines are pairable\n\
231 --nocheck-order do not check that the input is correctly sorted\n\
232 --header treat the first line in each file as field headers,\n\
233 print them without trying to pair them\n\
236 -z, --zero-terminated line delimiter is NUL, not newline\n\
238 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
239 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
242 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
243 else fields are separated by CHAR. Any FIELD is a field number counted\n\
244 from 1. FORMAT is one or more comma or blank separated specifications,\n\
245 each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\
246 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
247 separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
248 line of each file determines the number of fields output for each line.\n\
250 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
251 E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\
252 or use \"join -t ''\" if 'sort' has no options.\n\
253 Comparisons honor the rules specified by 'LC_COLLATE'.\n\
254 If the input is not sorted and some lines cannot be joined, a\n\
255 warning message will be given.\n\
257 emit_ancillary_info (PROGRAM_NAME
);
262 /* Record a field in LINE, with location FIELD and size LEN. */
265 extract_field (struct line
*line
, char *field
, idx_t len
)
267 if (line
->nfields
>= line
->nfields_allocated
)
268 line
->fields
= xpalloc (line
->fields
, &line
->nfields_allocated
, 1,
269 -1, sizeof *line
->fields
);
270 line
->fields
[line
->nfields
].beg
= field
;
271 line
->fields
[line
->nfields
].len
= len
;
278 return mcel_cmp (g
, tab
) == 0;
282 newline_or_blank (mcel_t g
)
284 return g
.ch
== '\n' || c32isblank (g
.ch
);
287 /* Fill in the 'fields' structure in LINE. */
290 xfields (struct line
*line
)
292 char *ptr
= line
->buf
.buffer
;
293 char const *lim
= ptr
+ line
->buf
.length
- 1;
299 while ((ptr
= skip_buf_matching (ptr
, lim
, newline_or_blank
, true)) < lim
)
301 char *sep
= skip_buf_matching (ptr
, lim
, newline_or_blank
, false);
302 extract_field (line
, ptr
, sep
- ptr
);
309 (sep
= skip_buf_matching (ptr
, lim
, eq_tab
, false)) < lim
;
310 ptr
= sep
+ mcel_scan (sep
, lim
).len
)
311 extract_field (line
, ptr
, sep
- ptr
);
313 extract_field (line
, ptr
, lim
- ptr
);
318 freeline (struct line
*line
)
323 line
->fields
= nullptr;
324 free (line
->buf
.buffer
);
325 line
->buf
.buffer
= nullptr;
328 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
329 >0 if it compares greater; 0 if it compares equal.
330 Report an error and exit if the comparison fails.
331 Use join fields JF_1 and JF_2 respectively. */
334 keycmp (struct line
const *line1
, struct line
const *line2
,
335 idx_t jf_1
, idx_t jf_2
)
337 /* Start of field to compare in each file. */
342 idx_t len2
; /* Length of fields to compare. */
345 if (jf_1
< line1
->nfields
)
347 beg1
= line1
->fields
[jf_1
].beg
;
348 len1
= line1
->fields
[jf_1
].len
;
356 if (jf_2
< line2
->nfields
)
358 beg2
= line2
->fields
[jf_2
].beg
;
359 len2
= line2
->fields
[jf_2
].len
;
368 return len2
== 0 ? 0 : -1;
374 /* FIXME: ignore_case does not work with NLS (in particular,
375 with multibyte chars). */
376 diff
= memcasecmp (beg1
, beg2
, MIN (len1
, len2
));
381 return xmemcoll (beg1
, len1
, beg2
, len2
);
382 diff
= memcmp (beg1
, beg2
, MIN (len1
, len2
));
387 return (len1
> len2
) - (len1
< len2
);
390 /* Check that successive input lines PREV and CURRENT from input file
391 WHATFILE are presented in order, unless the user may be relying on
392 the GNU extension that input lines may be out of order if no input
393 lines are unpairable.
395 If the user specified --nocheck-order, the check is not made.
396 If the user specified --check-order, the problem is fatal.
397 Otherwise (the default), the message is simply a warning.
399 A message is printed at most once per input file. */
402 check_order (const struct line
*prev
,
403 const struct line
*current
,
406 if (check_input_order
!= CHECK_ORDER_DISABLED
407 && ((check_input_order
== CHECK_ORDER_ENABLED
) || seen_unpairable
))
409 if (!issued_disorder_warning
[whatfile
- 1])
411 idx_t join_field
= whatfile
== 1 ? join_field_1
: join_field_2
;
412 if (keycmp (prev
, current
, join_field
, join_field
) > 0)
414 /* Exclude any trailing newline. */
415 idx_t len
= current
->buf
.length
;
416 if (0 < len
&& current
->buf
.buffer
[len
- 1] == '\n')
419 /* If the offending line is longer than INT_MAX, output
420 only the first INT_MAX bytes in this diagnostic. */
421 len
= MIN (INT_MAX
, len
);
423 error ((check_input_order
== CHECK_ORDER_ENABLED
425 0, _("%s:%ju: is not sorted: %.*s"),
426 g_names
[whatfile
- 1], line_no
[whatfile
- 1],
427 (int) len
, current
->buf
.buffer
);
429 /* If we get to here, the message was merely a warning.
430 Arrange to issue it only once per file. */
431 issued_disorder_warning
[whatfile
- 1] = true;
438 reset_line (struct line
*line
)
444 init_linep (struct line
**linep
)
446 struct line
*line
= xzalloc (sizeof *line
);
451 /* Read a line from FP into LINE and split it into fields.
452 Return true if successful. */
455 get_line (FILE *fp
, struct line
**linep
, int which
)
457 struct line
*line
= *linep
;
459 if (line
== prevline
[which
- 1])
461 SWAPLINES (line
, spareline
[which
- 1]);
468 line
= init_linep (linep
);
470 if (! readlinebuffer_delim (&line
->buf
, fp
, eolchar
))
473 error (EXIT_FAILURE
, errno
, _("read error"));
477 ++line_no
[which
- 1];
481 if (prevline
[which
- 1])
482 check_order (prevline
[which
- 1], line
, which
);
484 prevline
[which
- 1] = line
;
489 free_spareline (void)
491 for (idx_t i
= 0; i
< ARRAY_CARDINALITY (spareline
); i
++)
495 freeline (spareline
[i
]);
502 initseq (struct seq
*seq
)
506 seq
->lines
= nullptr;
509 /* Read a line from FP and add it to SEQ. Return true if successful. */
512 getseq (FILE *fp
, struct seq
*seq
, int whichfile
)
514 if (seq
->count
== seq
->alloc
)
516 seq
->lines
= xpalloc (seq
->lines
, &seq
->alloc
, 1, -1, sizeof *seq
->lines
);
517 for (idx_t i
= seq
->count
; i
< seq
->alloc
; i
++)
518 seq
->lines
[i
] = nullptr;
521 if (get_line (fp
, &seq
->lines
[seq
->count
], whichfile
))
529 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
530 true, else as the next. */
532 advance_seq (FILE *fp
, struct seq
*seq
, bool first
, int whichfile
)
537 return getseq (fp
, seq
, whichfile
);
541 delseq (struct seq
*seq
)
543 for (idx_t i
= 0; i
< seq
->alloc
; i
++)
545 freeline (seq
->lines
[i
]);
546 free (seq
->lines
[i
]);
552 /* Print field N of LINE if it exists and is nonempty, otherwise
553 'empty_filler' if it is nonempty. */
556 prfield (idx_t n
, struct line
const *line
)
558 if (n
< line
->nfields
)
560 idx_t len
= line
->fields
[n
].len
;
562 fwrite (line
->fields
[n
].beg
, 1, len
, stdout
);
563 else if (empty_filler
)
564 fputs (empty_filler
, stdout
);
566 else if (empty_filler
)
567 fputs (empty_filler
, stdout
);
570 /* Output all the fields in line, other than the join field. */
573 prfields (struct line
const *line
, idx_t join_field
, idx_t autocount
)
576 idx_t nfields
= autoformat
? autocount
: line
->nfields
;
578 for (i
= 0; i
< join_field
&& i
< nfields
; ++i
)
580 fwrite (output_separator
, 1, output_seplen
, stdout
);
583 for (i
= join_field
+ 1; i
< nfields
; ++i
)
585 fwrite (output_separator
, 1, output_seplen
, stdout
);
590 /* Print the join of LINE1 and LINE2. */
593 prjoin (struct line
const *line1
, struct line
const *line2
)
595 const struct outlist
*outlist
;
597 struct line
const *line
;
599 outlist
= outlist_head
.next
;
602 const struct outlist
*o
;
609 if (line1
== &uni_blank
)
612 field
= join_field_2
;
617 field
= join_field_1
;
622 line
= (o
->file
== 1 ? line1
: line2
);
625 prfield (field
, line
);
629 fwrite (output_separator
, 1, output_seplen
, stdout
);
635 if (line1
== &uni_blank
)
638 field
= join_field_2
;
643 field
= join_field_1
;
646 /* Output the join field. */
647 prfield (field
, line
);
649 /* Output other fields. */
650 prfields (line1
, join_field_1
, autocount_1
);
651 prfields (line2
, join_field_2
, autocount_2
);
660 /* Print the join of the files in FP1 and FP2. */
663 join (FILE *fp1
, FILE *fp2
)
665 struct seq seq1
, seq2
;
669 fadvise (fp1
, FADVISE_SEQUENTIAL
);
670 fadvise (fp2
, FADVISE_SEQUENTIAL
);
672 /* Read the first line of each file. */
674 getseq (fp1
, &seq1
, 1);
676 getseq (fp2
, &seq2
, 2);
680 autocount_1
= seq1
.count
? seq1
.lines
[0]->nfields
: 0;
681 autocount_2
= seq2
.count
? seq2
.lines
[0]->nfields
: 0;
684 if (join_header_lines
&& (seq1
.count
|| seq2
.count
))
686 struct line
const *hline1
= seq1
.count
? seq1
.lines
[0] : &uni_blank
;
687 struct line
const *hline2
= seq2
.count
? seq2
.lines
[0] : &uni_blank
;
688 prjoin (hline1
, hline2
);
689 prevline
[0] = nullptr;
690 prevline
[1] = nullptr;
692 advance_seq (fp1
, &seq1
, true, 1);
694 advance_seq (fp2
, &seq2
, true, 2);
697 while (seq1
.count
&& seq2
.count
)
699 diff
= keycmp (seq1
.lines
[0], seq2
.lines
[0],
700 join_field_1
, join_field_2
);
703 if (print_unpairables_1
)
704 prjoin (seq1
.lines
[0], &uni_blank
);
705 advance_seq (fp1
, &seq1
, true, 1);
706 seen_unpairable
= true;
711 if (print_unpairables_2
)
712 prjoin (&uni_blank
, seq2
.lines
[0]);
713 advance_seq (fp2
, &seq2
, true, 2);
714 seen_unpairable
= true;
718 /* Keep reading lines from file1 as long as they continue to
719 match the current line from file2. */
722 if (!advance_seq (fp1
, &seq1
, false, 1))
728 while (!keycmp (seq1
.lines
[seq1
.count
- 1], seq2
.lines
[0],
729 join_field_1
, join_field_2
));
731 /* Keep reading lines from file2 as long as they continue to
732 match the current line from file1. */
735 if (!advance_seq (fp2
, &seq2
, false, 2))
741 while (!keycmp (seq1
.lines
[0], seq2
.lines
[seq2
.count
- 1],
742 join_field_1
, join_field_2
));
746 for (idx_t i
= 0; i
< seq1
.count
- 1; ++i
)
749 for (j
= 0; j
< seq2
.count
- 1; ++j
)
750 prjoin (seq1
.lines
[i
], seq2
.lines
[j
]);
756 SWAPLINES (seq1
.lines
[0], seq1
.lines
[seq1
.count
- 1]);
764 SWAPLINES (seq2
.lines
[0], seq2
.lines
[seq2
.count
- 1]);
771 /* If the user did not specify --nocheck-order, then we read the
772 tail ends of both inputs to verify that they are in order. We
773 skip the rest of the tail once we have issued a warning for that
774 file, unless we actually need to print the unpairable lines. */
775 struct line
*line
= nullptr;
776 bool checktail
= false;
778 if (check_input_order
!= CHECK_ORDER_DISABLED
779 && !(issued_disorder_warning
[0] && issued_disorder_warning
[1]))
782 if ((print_unpairables_1
|| checktail
) && seq1
.count
)
784 if (print_unpairables_1
)
785 prjoin (seq1
.lines
[0], &uni_blank
);
787 seen_unpairable
= true;
788 while (get_line (fp1
, &line
, 1))
790 if (print_unpairables_1
)
791 prjoin (line
, &uni_blank
);
792 if (issued_disorder_warning
[0] && !print_unpairables_1
)
797 if ((print_unpairables_2
|| checktail
) && seq2
.count
)
799 if (print_unpairables_2
)
800 prjoin (&uni_blank
, seq2
.lines
[0]);
802 seen_unpairable
= true;
803 while (get_line (fp2
, &line
, 2))
805 if (print_unpairables_2
)
806 prjoin (&uni_blank
, line
);
807 if (issued_disorder_warning
[1] && !print_unpairables_2
)
819 /* Add a field spec for field FIELD of file FILE to 'outlist'. */
822 add_field (int file
, idx_t field
)
826 affirm (file
== 0 || file
== 1 || file
== 2);
827 affirm (file
!= 0 || field
== 0);
829 o
= xmalloc (sizeof *o
);
834 /* Add to the end of the list so the fields are in the right order. */
835 outlist_end
->next
= o
;
839 /* Convert a string of decimal digits, STR (the 1-based join field number),
840 to an integral value. Upon successful conversion, return one less
841 (the zero-based field number). Silently convert too-large values
842 to PTRDIFF_MAX. Otherwise, if a value cannot be converted, give a
843 diagnostic and exit. */
846 string_to_join_field (char const *str
)
850 strtol_error s_err
= xstrtoimax (str
, nullptr, 10, &val
, "");
851 if (s_err
== LONGINT_OVERFLOW
|| (s_err
== LONGINT_OK
&& PTRDIFF_MAX
< val
))
853 else if (s_err
!= LONGINT_OK
|| val
<= 0)
854 error (EXIT_FAILURE
, 0, _("invalid field number: %s"), quote (str
));
859 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
860 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
861 If S is valid, return true. Otherwise, give a diagnostic and exit. */
864 decode_field_spec (char const *s
, int *file_index
, idx_t
*field_index
)
866 /* The first character must be 0, 1, or 2. */
872 /* '0' must be all alone -- no '.FIELD'. */
873 error (EXIT_FAILURE
, 0, _("invalid field specifier: %s"), quote (s
));
882 error (EXIT_FAILURE
, 0, _("invalid field specifier: %s"), quote (s
));
883 *file_index
= s
[0] - '0';
884 *field_index
= string_to_join_field (s
+ 2);
888 error (EXIT_FAILURE
, 0,
889 _("invalid file number in field spec: %s"), quote (s
));
894 comma_or_blank (mcel_t g
)
896 return g
.ch
== ',' || c32isblank (g
.ch
);
899 /* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
902 add_field_list (char *str
)
910 char const *spec_item
= p
;
911 p
= skip_str_matching (spec_item
, comma_or_blank
, false);
914 mcel_t g
= mcel_scanz (p
);
918 decode_field_spec (spec_item
, &file_index
, &field_index
);
919 add_field (file_index
, field_index
);
924 /* Set the join field *VAR to VAL, but report an error if *VAR is set
925 more than once to incompatible values. */
928 set_join_field (ptrdiff_t *var
, idx_t val
)
930 if (0 <= *var
&& *var
!= val
)
931 error (EXIT_FAILURE
, 0,
932 _("incompatible join fields %td, %td"), *var
, val
);
936 /* Status of command-line arguments. */
940 /* This argument must be an operand, i.e., one of the files to be
944 /* This might be the argument of the preceding -j1 or -j2 option,
945 or it might be an operand. */
949 /* This might be the argument of the preceding -o option, or it might be
954 /* Add NAME to the array of input file NAMES with operand statuses
955 OPERAND_STATUS; currently there are NFILES names in the list. */
958 add_file_name (char *name
, char *names
[2],
959 int operand_status
[2], int joption_count
[2], int *nfiles
,
960 int *prev_optc_status
, int *optc_status
)
966 bool op0
= (operand_status
[0] == MUST_BE_OPERAND
);
967 char *arg
= names
[op0
];
968 switch (operand_status
[op0
])
970 case MUST_BE_OPERAND
:
971 error (0, 0, _("extra operand %s"), quoteaf (name
));
972 usage (EXIT_FAILURE
);
974 case MIGHT_BE_J1_ARG
:
976 set_join_field (&join_field_1
, string_to_join_field (arg
));
979 case MIGHT_BE_J2_ARG
:
981 set_join_field (&join_field_2
, string_to_join_field (arg
));
985 add_field_list (arg
);
990 operand_status
[0] = operand_status
[1];
996 operand_status
[n
] = *prev_optc_status
;
999 if (*prev_optc_status
== MIGHT_BE_O_ARG
)
1000 *optc_status
= MIGHT_BE_O_ARG
;
1004 main (int argc
, char **argv
)
1007 int prev_optc_status
= MUST_BE_OPERAND
;
1008 int operand_status
[2];
1009 int joption_count
[2] = { 0, 0 };
1015 initialize_main (&argc
, &argv
);
1016 set_program_name (argv
[0]);
1017 setlocale (LC_ALL
, "");
1018 bindtextdomain (PACKAGE
, LOCALEDIR
);
1019 textdomain (PACKAGE
);
1020 hard_LC_COLLATE
= hard_locale (LC_COLLATE
);
1022 atexit (close_stdout
);
1023 atexit (free_spareline
);
1025 print_pairables
= true;
1026 seen_unpairable
= false;
1027 issued_disorder_warning
[0] = issued_disorder_warning
[1] = false;
1028 check_input_order
= CHECK_ORDER_DEFAULT
;
1030 while ((optc
= getopt_long (argc
, argv
, "-a:e:i1:2:j:o:t:v:z",
1034 optc_status
= MUST_BE_OPERAND
;
1039 print_pairables
= false;
1045 if (xstrtol (optarg
, nullptr, 10, &val
, "") != LONGINT_OK
1046 || (val
!= 1 && val
!= 2))
1047 error (EXIT_FAILURE
, 0,
1048 _("invalid file number: %s"), quote (optarg
));
1050 print_unpairables_1
= true;
1052 print_unpairables_2
= true;
1057 if (empty_filler
&& ! STREQ (empty_filler
, optarg
))
1058 error (EXIT_FAILURE
, 0,
1059 _("conflicting empty-field replacement strings"));
1060 empty_filler
= optarg
;
1068 set_join_field (&join_field_1
, string_to_join_field (optarg
));
1072 set_join_field (&join_field_2
, string_to_join_field (optarg
));
1076 if ((optarg
[0] == '1' || optarg
[0] == '2') && !optarg
[1]
1077 && optarg
== argv
[optind
- 1] + 2)
1079 /* The argument was either "-j1" or "-j2". */
1080 bool is_j2
= (optarg
[0] == '2');
1081 joption_count
[is_j2
]++;
1082 optc_status
= MIGHT_BE_J1_ARG
+ is_j2
;
1086 set_join_field (&join_field_1
, string_to_join_field (optarg
));
1087 set_join_field (&join_field_2
, join_field_1
);
1092 if (STREQ (optarg
, "auto"))
1096 add_field_list (optarg
);
1097 optc_status
= MIGHT_BE_O_ARG
;
1106 /* '' => process the whole line. */
1107 newtab
= mcel_ch ('\n', 1);
1108 /* output_separator does not matter. */
1110 else if (STREQ (optarg
, "\\0"))
1112 newtab
= mcel_ch ('\0', 1);
1113 output_separator
= "";
1117 newtab
= mcel_scanz (optarg
);
1118 if (optarg
[newtab
.len
])
1119 error (EXIT_FAILURE
, 0, _("multi-character tab %s"),
1121 output_separator
= optarg
;
1123 if (tab
.len
&& mcel_cmp (tab
, newtab
) != 0)
1124 error (EXIT_FAILURE
, 0, _("incompatible tabs"));
1126 output_seplen
= newtab
.len
;
1134 case NOCHECK_ORDER_OPTION
:
1135 check_input_order
= CHECK_ORDER_DISABLED
;
1138 case CHECK_ORDER_OPTION
:
1139 check_input_order
= CHECK_ORDER_ENABLED
;
1142 case 1: /* Non-option argument. */
1143 add_file_name (optarg
, g_names
, operand_status
, joption_count
,
1144 &nfiles
, &prev_optc_status
, &optc_status
);
1147 case HEADER_LINE_OPTION
:
1148 join_header_lines
= true;
1151 case_GETOPT_HELP_CHAR
;
1153 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
1156 usage (EXIT_FAILURE
);
1159 prev_optc_status
= optc_status
;
1162 /* Process any operands after "--". */
1163 prev_optc_status
= MUST_BE_OPERAND
;
1164 while (optind
< argc
)
1165 add_file_name (argv
[optind
++], g_names
, operand_status
, joption_count
,
1166 &nfiles
, &prev_optc_status
, &optc_status
);
1171 error (0, 0, _("missing operand"));
1173 error (0, 0, _("missing operand after %s"), quote (argv
[argc
- 1]));
1174 usage (EXIT_FAILURE
);
1177 /* If "-j1" was specified and it turns out not to have had an argument,
1178 treat it as "-j 1". Likewise for -j2. */
1179 for (i
= 0; i
< 2; i
++)
1180 if (joption_count
[i
] != 0)
1182 set_join_field (&join_field_1
, i
);
1183 set_join_field (&join_field_2
, i
);
1186 if (join_field_1
< 0)
1188 if (join_field_2
< 0)
1191 fp1
= STREQ (g_names
[0], "-") ? stdin
: fopen (g_names
[0], "r");
1193 error (EXIT_FAILURE
, errno
, "%s", quotef (g_names
[0]));
1194 fp2
= STREQ (g_names
[1], "-") ? stdin
: fopen (g_names
[1], "r");
1196 error (EXIT_FAILURE
, errno
, "%s", quotef (g_names
[1]));
1198 error (EXIT_FAILURE
, errno
, _("both files cannot be standard input"));
1201 if (fclose (fp1
) != 0)
1202 error (EXIT_FAILURE
, errno
, "%s", quotef (g_names
[0]));
1203 if (fclose (fp2
) != 0)
1204 error (EXIT_FAILURE
, errno
, "%s", quotef (g_names
[1]));
1206 if (issued_disorder_warning
[0] || issued_disorder_warning
[1])
1207 error (EXIT_FAILURE
, 0, _("input is not in sorted order"));
1209 return EXIT_SUCCESS
;