import tput from NetBSD
[minix3.git] / usr.bin / sort / init.c
blob8884d0b4081cf934928f7fc0080af292f9f489fa
1 /* $NetBSD: init.c,v 1.28 2010/12/18 23:09:48 christos Exp $ */
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 /*-
33 * Copyright (c) 1993
34 * The Regents of the University of California. All rights reserved.
36 * This code is derived from software contributed to Berkeley by
37 * Peter McIlroy.
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
64 #include "sort.h"
66 __RCSID("$NetBSD: init.c,v 1.28 2010/12/18 23:09:48 christos Exp $");
68 #include <ctype.h>
69 #include <string.h>
71 static void insertcol(struct field *);
72 static const char *setcolumn(const char *, struct field *);
75 * masks of ignored characters.
77 static u_char dtable[NBINS], itable[NBINS];
80 * parsed key options
82 struct coldesc *clist = NULL;
83 int ncols = 0;
86 * clist (list of columns which correspond to one or more icol or tcol)
87 * is in increasing order of columns.
88 * Fields are kept in increasing order of fields.
91 /*
92 * keep clist in order--inserts a column in a sorted array
94 static void
95 insertcol(struct field *field)
97 int i;
98 struct coldesc *p;
100 /* Make space for new item */
101 p = realloc(clist, (ncols + 2) * sizeof(*clist));
102 if (!p)
103 err(1, "realloc");
104 clist = p;
105 memset(&clist[ncols], 0, sizeof(clist[ncols]));
107 for (i = 0; i < ncols; i++)
108 if (field->icol.num <= clist[i].num)
109 break;
110 if (field->icol.num != clist[i].num) {
111 memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
112 clist[i].num = field->icol.num;
113 ncols++;
115 if (field->tcol.num && field->tcol.num != field->icol.num) {
116 for (i = 0; i < ncols; i++)
117 if (field->tcol.num <= clist[i].num)
118 break;
119 if (field->tcol.num != clist[i].num) {
120 memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
121 clist[i].num = field->tcol.num;
122 ncols++;
128 * matches fields with the appropriate columns--n^2 but who cares?
130 void
131 fldreset(struct field *fldtab)
133 int i;
135 fldtab[0].tcol.p = clist + ncols - 1;
136 for (++fldtab; fldtab->icol.num; ++fldtab) {
137 for (i = 0; fldtab->icol.num != clist[i].num; i++)
139 fldtab->icol.p = clist + i;
140 if (!fldtab->tcol.num)
141 continue;
142 for (i = 0; fldtab->tcol.num != clist[i].num; i++)
144 fldtab->tcol.p = clist + i;
149 * interprets a column in a -k field
151 static const char *
152 setcolumn(const char *pos, struct field *cur_fld)
154 struct column *col;
155 char *npos;
156 int tmp;
157 col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
158 col->num = (int) strtol(pos, &npos, 10);
159 pos = npos;
160 if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
161 errx(2, "field numbers must be positive");
162 if (*pos == '.') {
163 if (!col->num)
164 errx(2, "cannot indent end of line");
165 ++pos;
166 col->indent = (int) strtol(pos, &npos, 10);
167 pos = npos;
168 if (&cur_fld->icol == col)
169 col->indent--;
170 if (col->indent < 0)
171 errx(2, "illegal offset");
173 for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
174 cur_fld->flags |= tmp;
175 if (cur_fld->icol.num == 0)
176 cur_fld->icol.num = 1;
177 return (pos);
181 setfield(const char *pos, struct field *cur_fld, int gflag)
183 cur_fld->mask = NULL;
185 pos = setcolumn(pos, cur_fld);
186 if (*pos == '\0') /* key extends to EOL. */
187 cur_fld->tcol.num = 0;
188 else {
189 if (*pos != ',')
190 errx(2, "illegal field descriptor");
191 setcolumn((++pos), cur_fld);
193 if (!cur_fld->flags)
194 cur_fld->flags = gflag;
195 if (REVERSE)
196 /* A local 'r' doesn't invert the global one */
197 cur_fld->flags &= ~R;
199 /* Assign appropriate mask table and weight table. */
200 cur_fld->weights = weight_tables[cur_fld->flags & (R | F)];
201 if (cur_fld->flags & I)
202 cur_fld->mask = itable;
203 else if (cur_fld->flags & D)
204 cur_fld->mask = dtable;
206 cur_fld->flags |= (gflag & (BI | BT));
207 if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
208 cur_fld->flags &= ~BT;
210 if (cur_fld->tcol.num
211 && !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
212 && (cur_fld->tcol.num <= cur_fld->icol.num
213 /* indent if 0 -> end of field, i.e. okay */
214 && cur_fld->tcol.indent != 0
215 && cur_fld->tcol.indent < cur_fld->icol.indent))
216 errx(2, "fields out of order");
218 insertcol(cur_fld);
219 return (cur_fld->tcol.num);
223 optval(int desc, int tcolflag)
225 switch(desc) {
226 case 'b':
227 if (!tcolflag)
228 return BI;
229 else
230 return BT;
231 case 'd': return D;
232 case 'f': return F;
233 case 'i': return I;
234 case 'l': return L;
235 case 'n': return N;
236 #ifdef __minix
237 case 'x': return X;
238 #endif
239 case 'r': return R;
240 default: return 0;
245 * Return true if the options found in ARG, according to the getopt
246 * spec in OPTS, require an additional argv word as an option
247 * argument.
249 static int
250 options_need_argument(const char *arg, const char *opts)
252 size_t pos;
253 const char *s;
255 /*assert(arg[0] == '-');*/
257 pos = 1;
258 while (arg[pos]) {
259 s = strchr(opts, arg[pos]);
260 if (s == NULL) {
261 /* invalid option */
262 return 0;
264 if (s[1] == ':') {
265 /* option requires argument */
266 if (arg[pos+1] == '\0') {
267 /* no argument in this arg */
268 return 1;
270 else {
271 /* argument is in this arg; no more options */
272 return 0;
275 pos++;
277 return 0;
281 * Replace historic +SPEC arguments with appropriate -kSPEC.
283 * The form can be either a single +SPEC or a pair +SPEC -SPEC.
284 * The following -SPEC is not recognized unless it follows
285 * immediately.
287 void
288 fixit(int *argc, char **argv, const char *opts)
290 int i, j, sawplus;
291 char *vpos, *tpos, spec[20];
292 int col, indent;
293 size_t sz;
295 sawplus = 0;
296 for (i = 1; i < *argc; i++) {
298 * This loop must stop exactly where getopt will stop.
299 * Otherwise it turns e.g. "sort x +3" into "sort x
300 * -k4.1", which will croak if +3 was in fact really a
301 * file name. In order to do this reliably we need to
302 * be able to identify argv words that are option
303 * arguments.
306 if (!strcmp(argv[i], "--")) {
307 /* End of options; stop. */
308 break;
311 if (argv[i][0] == '+') {
312 /* +POS argument */
313 sawplus = 1;
314 } else if (argv[i][0] == '-' && sawplus &&
315 isdigit((unsigned char)argv[i][1])) {
316 /* -POS argument */
317 sawplus = 0;
318 } else if (argv[i][0] == '-') {
319 /* other option */
320 sawplus = 0;
321 if (options_need_argument(argv[i], opts)) {
322 /* skip over the argument */
323 i++;
325 continue;
326 } else {
327 /* not an option at all; stop */
328 sawplus = 0;
329 break;
333 * At this point argv[i] is an old-style spec. The
334 * sawplus flag used by the above loop logic also
335 * tells us if it's a +SPEC or -SPEC.
338 /* parse spec */
339 tpos = argv[i]+1;
340 col = (int)strtol(tpos, &tpos, 10);
341 if (*tpos == '.') {
342 ++tpos;
343 indent = (int) strtol(tpos, &tpos, 10);
344 } else
345 indent = 0;
346 /* tpos now points to the optional flags */
349 * In the traditional form, x.0 means beginning of line;
350 * in the new form, x.0 means end of line. Adjust the
351 * value of INDENT accordingly.
353 if (sawplus) {
354 /* +POS */
355 col += 1;
356 indent += 1;
357 } else {
358 /* -POS */
359 if (indent > 0)
360 col += 1;
363 /* make the new style spec */
364 sz = snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
365 tpos);
367 if (sawplus) {
368 /* Replace the +POS argument with new-style -kSPEC */
369 asprintf(&vpos, "-k%s", spec);
370 argv[i] = vpos;
371 } else {
373 * Append the spec to the one from the
374 * preceding +POS argument, and remove the
375 * current argv element entirely.
377 asprintf(&vpos, "%s,%s", argv[i-1], spec);
378 free(argv[i-1]);
379 argv[i-1] = vpos;
380 for (j=i; j < *argc; j++)
381 argv[j] = argv[j+1];
382 *argc -= 1;
383 i--;
389 * ascii, Rascii, Ftable, and RFtable map
391 * Sorting 'weight' tables.
392 * Convert 'ascii' characters into their sort order.
393 * The 'F' variants fold lower case to upper equivalent
394 * The 'R' variants are for reverse sorting.
396 * The record separator (REC_D) never needs a weight, this frees one
397 * byte value as an 'end of key' marker. This must be 0 for normal
398 * weight tables, and 0xff for reverse weight tables - and is used
399 * to terminate keys so that short keys sort before (after if reverse)
400 * longer keys.
402 * The field separator has a normal weight - although it cannot occur
403 * within a key unless it is the default (space+tab).
405 * All other bytes map to the appropriate value for the sort order.
406 * Numeric sorts don't need any tables, they are reversed by negation.
408 * Global reverse sorts are done by writing the sorted keys in reverse
409 * order - the sort itself is stil forwards.
410 * This means that weights are only ever used when generating keys, any
411 * sort of the original data bytes is always forwards and unweighted.
413 * Note: this is only good for ASCII sorting. For different LC 's,
414 * all bets are off.
416 * itable[] and dtable[] are the masks for -i (ignore non-printables)
417 * and -d (only sort blank and alphanumerics).
419 void
420 settables(void)
422 int i;
423 int next_weight = 1;
424 int rev_weight = 254;
426 ascii[REC_D] = 0;
427 Rascii[REC_D] = 255;
428 Ftable[REC_D] = 0;
429 RFtable[REC_D] = 255;
431 for (i = 0; i < 256; i++) {
432 if (i == REC_D)
433 continue;
434 ascii[i] = next_weight;
435 Rascii[i] = rev_weight;
436 if (Ftable[i] == 0) {
437 Ftable[i] = next_weight;
438 RFtable[i] = rev_weight;
439 Ftable[tolower(i)] = next_weight;
440 RFtable[tolower(i)] = rev_weight;
442 next_weight++;
443 rev_weight--;
445 if (i == '\n' || isprint(i))
446 itable[i] = 1;
448 if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
449 dtable[i] = 1;