Sync with manuals from netbsd-8 branch.
[minix3.git] / usr.bin / sort / init.c
blob59ff619eb0acc612a095f1d23b3556206d105752
1 /* $NetBSD: init.c,v 1.29 2013/10/18 20:47:06 christos Exp $ */
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 /*-
33 * Copyright (c) 1993
34 * The Regents of the University of California. All rights reserved.
36 * This code is derived from software contributed to Berkeley by
37 * Peter McIlroy.
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
64 #include "sort.h"
66 __RCSID("$NetBSD: init.c,v 1.29 2013/10/18 20:47:06 christos Exp $");
68 #include <ctype.h>
69 #include <string.h>
71 static void insertcol(struct field *);
72 static const char *setcolumn(const char *, struct field *);
75 * masks of ignored characters.
77 static u_char dtable[NBINS], itable[NBINS];
80 * parsed key options
82 struct coldesc *clist = NULL;
83 int ncols = 0;
86 * clist (list of columns which correspond to one or more icol or tcol)
87 * is in increasing order of columns.
88 * Fields are kept in increasing order of fields.
91 /*
92 * keep clist in order--inserts a column in a sorted array
94 static void
95 insertcol(struct field *field)
97 int i;
98 struct coldesc *p;
100 /* Make space for new item */
101 p = realloc(clist, (ncols + 2) * sizeof(*clist));
102 if (!p)
103 err(1, "realloc");
104 clist = p;
105 memset(&clist[ncols], 0, sizeof(clist[ncols]));
107 for (i = 0; i < ncols; i++)
108 if (field->icol.num <= clist[i].num)
109 break;
110 if (field->icol.num != clist[i].num) {
111 memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
112 clist[i].num = field->icol.num;
113 ncols++;
115 if (field->tcol.num && field->tcol.num != field->icol.num) {
116 for (i = 0; i < ncols; i++)
117 if (field->tcol.num <= clist[i].num)
118 break;
119 if (field->tcol.num != clist[i].num) {
120 memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
121 clist[i].num = field->tcol.num;
122 ncols++;
128 * matches fields with the appropriate columns--n^2 but who cares?
130 void
131 fldreset(struct field *fldtab)
133 int i;
135 fldtab[0].tcol.p = clist + ncols - 1;
136 for (++fldtab; fldtab->icol.num; ++fldtab) {
137 for (i = 0; fldtab->icol.num != clist[i].num; i++)
139 fldtab->icol.p = clist + i;
140 if (!fldtab->tcol.num)
141 continue;
142 for (i = 0; fldtab->tcol.num != clist[i].num; i++)
144 fldtab->tcol.p = clist + i;
149 * interprets a column in a -k field
151 static const char *
152 setcolumn(const char *pos, struct field *cur_fld)
154 struct column *col;
155 char *npos;
156 int tmp;
157 col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
158 col->num = (int) strtol(pos, &npos, 10);
159 pos = npos;
160 if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
161 errx(2, "field numbers must be positive");
162 if (*pos == '.') {
163 if (!col->num)
164 errx(2, "cannot indent end of line");
165 ++pos;
166 col->indent = (int) strtol(pos, &npos, 10);
167 pos = npos;
168 if (&cur_fld->icol == col)
169 col->indent--;
170 if (col->indent < 0)
171 errx(2, "illegal offset");
173 for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
174 cur_fld->flags |= tmp;
175 if (cur_fld->icol.num == 0)
176 cur_fld->icol.num = 1;
177 return (pos);
181 setfield(const char *pos, struct field *cur_fld, int gflag)
183 cur_fld->mask = NULL;
185 pos = setcolumn(pos, cur_fld);
186 if (*pos == '\0') /* key extends to EOL. */
187 cur_fld->tcol.num = 0;
188 else {
189 if (*pos != ',')
190 errx(2, "illegal field descriptor");
191 setcolumn((++pos), cur_fld);
193 if (!cur_fld->flags)
194 cur_fld->flags = gflag;
195 if (REVERSE)
196 /* A local 'r' doesn't invert the global one */
197 cur_fld->flags &= ~R;
199 /* Assign appropriate mask table and weight table. */
200 cur_fld->weights = weight_tables[cur_fld->flags & (R | F)];
201 if (cur_fld->flags & I)
202 cur_fld->mask = itable;
203 else if (cur_fld->flags & D)
204 cur_fld->mask = dtable;
206 cur_fld->flags |= (gflag & (BI | BT));
207 if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
208 cur_fld->flags &= ~BT;
210 if (cur_fld->tcol.num
211 && !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
212 && (cur_fld->tcol.num <= cur_fld->icol.num
213 /* indent if 0 -> end of field, i.e. okay */
214 && cur_fld->tcol.indent != 0
215 && cur_fld->tcol.indent < cur_fld->icol.indent))
216 errx(2, "fields out of order");
218 insertcol(cur_fld);
219 return (cur_fld->tcol.num);
223 optval(int desc, int tcolflag)
225 switch(desc) {
226 case 'b':
227 if (!tcolflag)
228 return BI;
229 else
230 return BT;
231 case 'd': return D;
232 case 'f': return F;
233 case 'i': return I;
234 case 'l': return L;
235 case 'n': return N;
236 #if defined(__minix)
237 case 'x': return X;
238 #endif /* defined(__minix) */
239 case 'r': return R;
240 default: return 0;
245 * Return true if the options found in ARG, according to the getopt
246 * spec in OPTS, require an additional argv word as an option
247 * argument.
249 static int
250 options_need_argument(const char *arg, const char *opts)
252 size_t pos;
253 const char *s;
255 /*assert(arg[0] == '-');*/
257 pos = 1;
258 while (arg[pos]) {
259 s = strchr(opts, arg[pos]);
260 if (s == NULL) {
261 /* invalid option */
262 return 0;
264 if (s[1] == ':') {
265 /* option requires argument */
266 if (arg[pos+1] == '\0') {
267 /* no argument in this arg */
268 return 1;
270 else {
271 /* argument is in this arg; no more options */
272 return 0;
275 pos++;
277 return 0;
281 * Replace historic +SPEC arguments with appropriate -kSPEC.
283 * The form can be either a single +SPEC or a pair +SPEC -SPEC.
284 * The following -SPEC is not recognized unless it follows
285 * immediately.
287 void
288 fixit(int *argc, char **argv, const char *opts)
290 int i, j, sawplus;
291 char *vpos, *tpos, spec[20];
292 int col, indent;
294 sawplus = 0;
295 for (i = 1; i < *argc; i++) {
297 * This loop must stop exactly where getopt will stop.
298 * Otherwise it turns e.g. "sort x +3" into "sort x
299 * -k4.1", which will croak if +3 was in fact really a
300 * file name. In order to do this reliably we need to
301 * be able to identify argv words that are option
302 * arguments.
305 if (!strcmp(argv[i], "--")) {
306 /* End of options; stop. */
307 break;
310 if (argv[i][0] == '+') {
311 /* +POS argument */
312 sawplus = 1;
313 } else if (argv[i][0] == '-' && sawplus &&
314 isdigit((unsigned char)argv[i][1])) {
315 /* -POS argument */
316 sawplus = 0;
317 } else if (argv[i][0] == '-') {
318 /* other option */
319 sawplus = 0;
320 if (options_need_argument(argv[i], opts)) {
321 /* skip over the argument */
322 i++;
324 continue;
325 } else {
326 /* not an option at all; stop */
327 sawplus = 0;
328 break;
332 * At this point argv[i] is an old-style spec. The
333 * sawplus flag used by the above loop logic also
334 * tells us if it's a +SPEC or -SPEC.
337 /* parse spec */
338 tpos = argv[i]+1;
339 col = (int)strtol(tpos, &tpos, 10);
340 if (*tpos == '.') {
341 ++tpos;
342 indent = (int) strtol(tpos, &tpos, 10);
343 } else
344 indent = 0;
345 /* tpos now points to the optional flags */
348 * In the traditional form, x.0 means beginning of line;
349 * in the new form, x.0 means end of line. Adjust the
350 * value of INDENT accordingly.
352 if (sawplus) {
353 /* +POS */
354 col += 1;
355 indent += 1;
356 } else {
357 /* -POS */
358 if (indent > 0)
359 col += 1;
362 /* make the new style spec */
363 (void)snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
364 tpos);
366 if (sawplus) {
367 /* Replace the +POS argument with new-style -kSPEC */
368 asprintf(&vpos, "-k%s", spec);
369 argv[i] = vpos;
370 } else {
372 * Append the spec to the one from the
373 * preceding +POS argument, and remove the
374 * current argv element entirely.
376 asprintf(&vpos, "%s,%s", argv[i-1], spec);
377 free(argv[i-1]);
378 argv[i-1] = vpos;
379 for (j=i; j < *argc; j++)
380 argv[j] = argv[j+1];
381 *argc -= 1;
382 i--;
388 * ascii, Rascii, Ftable, and RFtable map
390 * Sorting 'weight' tables.
391 * Convert 'ascii' characters into their sort order.
392 * The 'F' variants fold lower case to upper equivalent
393 * The 'R' variants are for reverse sorting.
395 * The record separator (REC_D) never needs a weight, this frees one
396 * byte value as an 'end of key' marker. This must be 0 for normal
397 * weight tables, and 0xff for reverse weight tables - and is used
398 * to terminate keys so that short keys sort before (after if reverse)
399 * longer keys.
401 * The field separator has a normal weight - although it cannot occur
402 * within a key unless it is the default (space+tab).
404 * All other bytes map to the appropriate value for the sort order.
405 * Numeric sorts don't need any tables, they are reversed by negation.
407 * Global reverse sorts are done by writing the sorted keys in reverse
408 * order - the sort itself is stil forwards.
409 * This means that weights are only ever used when generating keys, any
410 * sort of the original data bytes is always forwards and unweighted.
412 * Note: this is only good for ASCII sorting. For different LC 's,
413 * all bets are off.
415 * itable[] and dtable[] are the masks for -i (ignore non-printables)
416 * and -d (only sort blank and alphanumerics).
418 void
419 settables(void)
421 int i;
422 int next_weight = 1;
423 int rev_weight = 254;
425 ascii[REC_D] = 0;
426 Rascii[REC_D] = 255;
427 Ftable[REC_D] = 0;
428 RFtable[REC_D] = 255;
430 for (i = 0; i < 256; i++) {
431 if (i == REC_D)
432 continue;
433 ascii[i] = next_weight;
434 Rascii[i] = rev_weight;
435 if (Ftable[i] == 0) {
436 Ftable[i] = next_weight;
437 RFtable[i] = rev_weight;
438 Ftable[tolower(i)] = next_weight;
439 RFtable[tolower(i)] = rev_weight;
441 next_weight++;
442 rev_weight--;
444 if (i == '\n' || isprint(i))
445 itable[i] = 1;
447 if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
448 dtable[i] = 1;