1 /* uniq -- remove duplicate lines from a sorted file
2 Copyright (C) 86, 91, 1995-1998, 1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Richard Stallman and David MacKenzie. */
24 #include <sys/types.h>
27 #include "linebuffer.h"
30 #include "memcasecmp.h"
32 /* The official name of this program (e.g., no `g' prefix). */
33 #define PROGRAM_NAME "uniq"
35 #define AUTHORS "Richard Stallman and David MacKenzie"
37 #define SWAP_LINES(A, B) \
40 struct linebuffer *_tmp; \
47 /* The name this program was run with. */
50 /* Number of fields to skip on each line when doing comparisons. */
51 static int skip_fields
;
53 /* Number of chars to skip after skipping any fields. */
54 static int skip_chars
;
56 /* Number of chars to compare; if 0, compare the whole lines. */
57 static int check_chars
;
61 count_occurrences
, /* -c Print count before output lines. */
62 count_none
/* Default. Do not print counts. */
65 /* Whether and how to precede the output lines with a count of the number of
66 times they occurred in the input. */
67 static enum countmode countmode
;
71 output_repeated
, /* -d Only lines that are repeated. */
72 output_all_repeated
, /* -D All lines that are repeated. */
73 output_unique
, /* -u Only lines that are not repeated. */
74 output_all
/* Default. Print first copy of each line. */
77 /* Which lines to output. */
78 static enum output_mode mode
;
80 /* If nonzero, ignore case when comparing. */
81 static int ignore_case
;
83 static struct option
const longopts
[] =
85 {"count", no_argument
, NULL
, 'c'},
86 {"repeated", no_argument
, NULL
, 'd'},
87 {"all-repeated", no_argument
, NULL
, 'D'},
88 {"ignore-case", no_argument
, NULL
, 'i'},
89 {"unique", no_argument
, NULL
, 'u'},
90 {"skip-fields", required_argument
, NULL
, 'f'},
91 {"skip-chars", required_argument
, NULL
, 's'},
92 {"check-chars", required_argument
, NULL
, 'w'},
93 {GETOPT_HELP_OPTION_DECL
},
94 {GETOPT_VERSION_OPTION_DECL
},
102 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
107 Usage: %s [OPTION]... [INPUT [OUTPUT]]\n\
111 Discard all but one of successive identical lines from INPUT (or\n\
112 standard input), writing to OUTPUT (or standard output).\n\
114 -c, --count prefix lines by the number of occurrences\n\
115 -d, --repeated only print duplicate lines\n\
116 -D, --all-repeated print all duplicate lines\n\
117 -f, --skip-fields=N avoid comparing the first N fields\n\
118 -i, --ignore-case ignore differences in case when comparing\n\
119 -s, --skip-chars=N avoid comparing the first N characters\n\
120 -u, --unique only print unique lines\n\
121 -w, --check-chars=N compare no more than N characters in lines\n\
124 --help display this help and exit\n\
125 --version output version information and exit\n\
127 A field is a run of whitespace, then non-whitespace characters.\n\
128 Fields are skipped before chars.\n\
130 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
132 exit (status
== 0 ? EXIT_SUCCESS
: EXIT_FAILURE
);
135 /* Given a linebuffer LINE,
136 return a pointer to the beginning of the line's field to be compared. */
139 find_field (const struct linebuffer
*line
)
142 register char *lp
= line
->buffer
;
143 register size_t size
= line
->length
;
144 register size_t i
= 0;
146 for (count
= 0; count
< skip_fields
&& i
< size
; count
++)
148 while (i
< size
&& ISBLANK (lp
[i
]))
150 while (i
< size
&& !ISBLANK (lp
[i
]))
154 for (count
= 0; count
< skip_chars
&& i
< size
; count
++)
160 /* Return zero if two strings OLD and NEW match, nonzero if not.
161 OLD and NEW point not to the beginnings of the lines
162 but rather to the beginnings of the fields to compare.
163 OLDLEN and NEWLEN are their lengths. */
166 different (const char *old
, const char *new, size_t oldlen
, size_t newlen
)
172 if (oldlen
> check_chars
)
173 oldlen
= check_chars
;
174 if (newlen
> check_chars
)
175 newlen
= check_chars
;
178 /* Use an if-statement here rather than a function variable to
179 avoid portability hassles of getting a non-conflicting declaration
182 order
= memcasecmp (old
, new, MIN (oldlen
, newlen
));
184 order
= memcmp (old
, new, MIN (oldlen
, newlen
));
187 return oldlen
- newlen
;
191 /* Output the line in linebuffer LINE to stream STREAM
192 provided that the switches say it should be output.
193 If requested, print the number of times it occurred, as well;
194 LINECOUNT + 1 is the number of times that the line occurred. */
197 writeline (const struct linebuffer
*line
, FILE *stream
, int linecount
)
199 if ((mode
== output_unique
&& linecount
!= 0)
200 || (mode
== output_repeated
&& linecount
== 0)
201 || (mode
== output_all_repeated
&& linecount
== 0))
204 if (countmode
== count_occurrences
)
205 fprintf (stream
, "%7d\t", linecount
+ 1);
207 fwrite (line
->buffer
, sizeof (char), line
->length
, stream
);
210 /* Process input file INFILE with output to OUTFILE.
211 If either is "-", use the standard I/O stream for it instead. */
214 check_file (const char *infile
, const char *outfile
)
218 struct linebuffer lb1
, lb2
;
219 struct linebuffer
*thisline
, *prevline
;
221 if (STREQ (infile
, "-"))
224 istream
= fopen (infile
, "r");
226 error (EXIT_FAILURE
, errno
, "%s", infile
);
228 if (STREQ (outfile
, "-"))
231 ostream
= fopen (outfile
, "w");
233 error (EXIT_FAILURE
, errno
, "%s", outfile
);
238 initbuffer (thisline
);
239 initbuffer (prevline
);
241 /* The duplication in the following `if' and `else' blocks is an
242 optimization to distinguish the common case (in which none of
243 the following options has been specified: --count, -repeated,
244 --all-repeated, --unique) from the others. In the common case,
245 this optimization lets uniq output each different line right away,
246 without waiting to see if the next one is different. */
248 if (mode
== output_all
&& countmode
== count_none
)
250 char *prevfield
IF_LINT (= NULL
);
251 size_t prevlen
IF_LINT (= 0);
253 while (!feof (istream
))
257 if (readline (thisline
, istream
) == 0)
259 thisfield
= find_field (thisline
);
260 thislen
= thisline
->length
- (thisfield
- thisline
->buffer
);
261 if (prevline
->length
== 0
262 || different (thisfield
, prevfield
, thislen
, prevlen
))
264 fwrite (thisline
->buffer
, sizeof (char),
265 thisline
->length
, ostream
);
267 SWAP_LINES (prevline
, thisline
);
268 prevfield
= thisfield
;
279 if (readline (prevline
, istream
) == 0)
281 prevfield
= find_field (prevline
);
282 prevlen
= prevline
->length
- (prevfield
- prevline
->buffer
);
284 while (!feof (istream
))
289 if (readline (thisline
, istream
) == 0)
291 thisfield
= find_field (thisline
);
292 thislen
= thisline
->length
- (thisfield
- thisline
->buffer
);
293 match
= !different (thisfield
, prevfield
, thislen
, prevlen
);
298 if (!match
|| mode
== output_all_repeated
)
300 writeline (prevline
, ostream
, match_count
);
301 SWAP_LINES (prevline
, thisline
);
302 prevfield
= thisfield
;
309 writeline (prevline
, ostream
, match_count
);
313 if (ferror (istream
) || fclose (istream
) == EOF
)
314 error (EXIT_FAILURE
, errno
, _("error reading %s"), infile
);
316 if (ferror (ostream
) || fclose (ostream
) == EOF
)
317 error (EXIT_FAILURE
, errno
, _("error writing %s"), outfile
);
324 main (int argc
, char **argv
)
327 char *infile
= "-", *outfile
= "-";
329 program_name
= argv
[0];
330 setlocale (LC_ALL
, "");
331 bindtextdomain (PACKAGE
, LOCALEDIR
);
332 textdomain (PACKAGE
);
338 countmode
= count_none
;
340 while ((optc
= getopt_long (argc
, argv
, "0123456789cdDf:is:uw:", longopts
,
358 skip_fields
= skip_fields
* 10 + optc
- '0';
362 countmode
= count_occurrences
;
366 mode
= output_repeated
;
370 mode
= output_all_repeated
;
373 case 'f': /* Like '-#'. */
376 if (xstrtol (optarg
, NULL
, 10, &tmp_long
, "") != LONGINT_OK
377 || tmp_long
<= 0 || tmp_long
> INT_MAX
)
378 error (EXIT_FAILURE
, 0,
379 _("invalid number of fields to skip: `%s'"),
381 skip_fields
= (int) tmp_long
;
389 case 's': /* Like '+#'. */
392 if (xstrtol (optarg
, NULL
, 10, &tmp_long
, "") != LONGINT_OK
393 || tmp_long
<= 0 || tmp_long
> INT_MAX
)
394 error (EXIT_FAILURE
, 0,
395 _("invalid number of bytes to skip: `%s'"),
397 skip_chars
= (int) tmp_long
;
402 mode
= output_unique
;
408 if (xstrtol (optarg
, NULL
, 10, &tmp_long
, "") != LONGINT_OK
409 || tmp_long
<= 0 || tmp_long
> INT_MAX
)
410 error (EXIT_FAILURE
, 0,
411 _("invalid number of bytes to compare: `%s'"),
413 check_chars
= (int) tmp_long
;
417 case_GETOPT_HELP_CHAR
;
419 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
426 if (optind
>= 2 && !STREQ (argv
[optind
- 1], "--"))
428 /* Interpret non-option arguments with leading `+' only
429 if we haven't seen `--'. */
430 while (optind
< argc
&& argv
[optind
][0] == '+')
432 char *opt_str
= argv
[optind
++];
434 if (xstrtol (opt_str
, NULL
, 10, &tmp_long
, "") != LONGINT_OK
435 || tmp_long
<= 0 || tmp_long
> INT_MAX
)
436 error (EXIT_FAILURE
, 0,
437 _("invalid number of bytes to compare: `%s'"),
439 skip_chars
= (int) tmp_long
;
444 infile
= argv
[optind
++];
447 outfile
= argv
[optind
++];
451 error (0, 0, _("too many arguments"));
455 if (countmode
== count_occurrences
&& mode
== output_all_repeated
)
458 _("printing all duplicated lines and repeat counts is meaningless"));
462 check_file (infile
, outfile
);