1 /* $NetBSD: uniq.c,v 1.6 2014/06/21 17:48:07 christos Exp $ */
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
31 #include <sys/cdefs.h>
32 __RCSID("$NetBSD: uniq.c,v 1.6 2014/06/21 17:48:07 christos Exp $");
45 static const HASHINFO hinfo
= {
54 static int comp(const char *, char **, size_t *);
57 * Preserve only unique content lines in a file. Input lines that have
58 * content [alphanumeric characters before a comment] are white-space
59 * normalized and have their comments removed. Then they are placed
60 * in a hash table, and only the first instance of them is printed.
61 * Comment lines without any alphanumeric content are always printed
62 * since they are there to make the file "pretty". Comment lines with
63 * alphanumeric content are also placed into the hash table and only
67 uniq(const char *fname
)
71 static const DBT data
= { NULL
, 0 };
76 if ((db
= dbopen(NULL
, O_RDWR
, 0, DB_HASH
, &hinfo
)) == NULL
)
77 err(1, "Cannot create in memory database");
79 fp
= efopen(fname
, "r");
80 while ((line
= fgetln(fp
, &len
)) != NULL
) {
83 if (!comp(line
, &compline
, &complen
)) {
84 (void)fprintf(stdout
, "%*.*s", (int)len
, (int)len
,
90 switch ((db
->put
)(db
, &key
, &data
, R_NOOVERWRITE
)) {
92 (void)fprintf(stdout
, "%*.*s", (int)len
, (int)len
,
105 (void)fflush(stdout
);
110 * normalize whitespace in the original line and place a new string
111 * with whitespace converted to a single space in compline. If the line
112 * contains just comments, we preserve them. If it contains data and
113 * comments, we kill the comments. Return 1 if the line had actual
114 * contents, or 0 if it was just a comment without alphanumeric characters.
117 comp(const char *origline
, char **compline
, size_t *len
)
119 const unsigned char *p
;
122 size_t l
= *len
, complen
;
123 int hasalnum
, iscomment
;
125 /* Eat leading space */
126 for (p
= (const unsigned char *)origline
; l
&& *p
&& isspace(*p
);
129 cline
= emalloc(l
+ 1);
130 (void)memcpy(cline
, p
, l
);
139 for (q
= (unsigned char *)cline
; l
&& *p
; p
++, l
--) {
141 if (complen
&& isspace(q
[-1]))
146 if (!iscomment
&& *p
== '#') {
151 hasalnum
|= isalnum(*p
);
157 /* Eat trailing space */
158 while (complen
&& isspace(q
[-1])) {