Sync with manuals from netbsd-8 branch.
[minix3.git] / usr.bin / sort / fields.c
blob6ed6cf9de779b4dcec28ca23aef2d990df4645f0
1 /* $NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $ */
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 /*-
33 * Copyright (c) 1993
34 * The Regents of the University of California. All rights reserved.
36 * This code is derived from software contributed to Berkeley by
37 * Peter McIlroy.
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
64 /* Subroutines to generate sort keys. */
66 #include "sort.h"
68 __RCSID("$NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $");
70 #define SKIP_BLANKS(ptr) { \
71 if (BLANK & d_mask[*(ptr)]) \
72 while (BLANK & d_mask[*(++(ptr))]); \
75 #define NEXTCOL(pos) { \
76 if (!SEP_FLAG) \
77 while (BLANK & l_d_mask[*(++pos)]); \
78 while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\
81 static u_char *enterfield(u_char *, const u_char *, struct field *, int);
82 static u_char *number(u_char *, const u_char *, u_char *, u_char *, int);
83 static u_char *length(u_char *, const u_char *, u_char *, u_char *, int);
85 #if defined(__minix)
86 static u_char *numhex(u_char *, const u_char *, u_char *, u_char *, int);
87 #endif /* defined(__minix) */
89 #define DECIMAL_POINT '.'
92 * constructs sort key with leading recheader, followed by the key,
93 * followed by the original line.
95 length_t
96 enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data,
97 size_t line_size, struct field fieldtable[])
98 /* keybuf: pointer to start of key */
100 int i;
101 u_char *l_d_mask;
102 u_char *lineend, *pos;
103 const u_char *endkey;
104 u_char *keypos;
105 struct coldesc *clpos;
106 int col = 1;
107 struct field *ftpos;
109 l_d_mask = d_mask;
110 pos = line_data - 1;
111 lineend = line_data + line_size-1;
112 /* don't include rec_delimiter */
114 for (i = 0; i < ncols; i++) {
115 clpos = clist + i;
116 for (; (col < clpos->num) && (pos < lineend); col++) {
117 NEXTCOL(pos);
119 if (pos >= lineend)
120 break;
121 clpos->start = SEP_FLAG ? pos + 1 : pos;
122 NEXTCOL(pos);
123 clpos->end = pos;
124 col++;
125 if (pos >= lineend) {
126 clpos->end = lineend;
127 i++;
128 break;
131 for (; i <= ncols; i++)
132 clist[i].start = clist[i].end = lineend;
133 if (clist[0].start < line_data)
134 clist[0].start++;
137 * We write the sort keys (concatenated) followed by the
138 * original line data (for output) as the 'keybuf' data.
139 * keybuf->length is the number of key bytes + data bytes.
140 * keybuf->offset is the number of key bytes.
141 * We add a record separator weight after the key in case
142 * (as is usual) we need to preserve the order of equal lines,
143 * and for 'sort -u'.
144 * The key itself will have had the correct weight applied.
146 keypos = keybuf->data;
147 endkey = keybuf_end - line_size - 1;
148 if (endkey <= keypos)
149 /* No room for any key bytes */
150 return 1;
152 for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) {
153 if ((keypos = enterfield(keypos, endkey, ftpos,
154 fieldtable->flags)) == NULL)
155 return (1);
158 keybuf->offset = keypos - keybuf->data;
159 keybuf->length = keybuf->offset + line_size;
162 * Posix requires that equal keys be further sorted by the
163 * entire original record.
164 * NetBSD has (at least for some time) kept equal keys in
165 * their original order.
166 * For 'sort -u' posix_sort is unset.
168 keybuf->keylen = posix_sort ? keybuf->length : keybuf->offset;
170 memcpy(keypos, line_data, line_size);
171 return (0);
175 * constructs a field (as defined by -k) within a key
177 static u_char *
178 enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld,
179 int gflags)
181 u_char *start, *end, *lineend, *mask, *lweight;
182 struct column icol, tcol;
183 u_int flags;
185 icol = cur_fld->icol;
186 tcol = cur_fld->tcol;
187 flags = cur_fld->flags;
188 start = icol.p->start;
189 lineend = clist[ncols].end;
190 if (flags & BI)
191 SKIP_BLANKS(start);
192 start += icol.indent;
193 start = min(start, lineend);
195 if (!tcol.num)
196 end = lineend;
197 else {
198 if (tcol.indent) {
199 end = tcol.p->start;
200 if (flags & BT)
201 SKIP_BLANKS(end);
202 end += tcol.indent;
203 end = min(end, lineend);
204 } else
205 end = tcol.p->end;
208 if (flags & L)
209 return length(tablepos, endkey, start, end, flags);
210 if (flags & N)
211 return number(tablepos, endkey, start, end, flags);
212 #if defined(__minix)
213 if (flags & X)
214 return numhex(tablepos, endkey, start, end, flags);
215 #endif /* defined(__minix) */
217 /* Bound check space - assuming nothing is skipped */
218 if (tablepos + (end - start) + 1 >= endkey)
219 return NULL;
221 mask = cur_fld->mask;
222 lweight = cur_fld->weights;
223 for (; start < end; start++) {
224 if (!mask || mask[*start]) {
225 *tablepos++ = lweight[*start];
228 /* Add extra byte (absent from lweight) to sort short keys correctly */
229 *tablepos++ = lweight[REC_D];
230 return tablepos;
234 * Numbers are converted to a floating point format (exponent & mantissa)
235 * so that they compare correctly as sequence of unsigned bytes.
236 * Bytes 0x00 and 0xff are used to terminate positive and negative numbers
237 * to ensure that 0.123 sorts after 0.12 and -0.123 sorts before -0.12.
239 * The first byte contain the overall sign, exponent sign and some of the
240 * exponent. These have to be ordered (-ve value, decreasing exponent),
241 * zero, (+ve value, increasing exponent).
243 * The first byte is 0x80 for zero, 0xc0 for +ve with exponent 0.
244 * -ve values are the 1's compliments (so 0x7f isn't used!).
246 * This only leaves 63 byte values for +ve exponents - which isn't enough.
247 * The largest 4 exponent values are used to hold a byte count of the
248 * number of following bytes that contain 8 exponent bits per byte,
249 * This lets us sort exponents from -2^31 to +2^31.
251 * The mantissa is stored 2 digits per byte offset by 0x40, for negative
252 * numbers the order must be reversed (they are bit inverted).
254 * Reverse sorts are done by inverting the sign of the number.
256 #define MAX_EXP_ENC ((int)sizeof(int))
258 static u_char *
259 number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
260 int reverse)
262 int exponent = -1;
263 int had_dp = 0;
264 u_char *tline;
265 char ch;
266 unsigned int val;
267 u_char *last_nz_pos;
268 u_char negate;
270 if (reverse & R)
271 negate = 0xff;
272 else
273 negate = 0;
275 /* Give ourselves space for the key terminator */
276 bufend--;
278 /* Ensure we have enough space for the exponent */
279 if (pos + 1 + MAX_EXP_ENC > bufend)
280 return (NULL);
282 SKIP_BLANKS(line);
283 if (*line == '-') { /* set the sign */
284 negate ^= 0xff;
285 line++;
286 } else if (*line == '+') {
287 line++;
290 /* eat initial zeroes */
291 for (; *line == '0' && line < lineend; line++)
292 continue;
294 /* calculate exponents */
295 if (*line == DECIMAL_POINT) {
296 /* Decimal fraction */
297 had_dp = 1;
298 while (*++line == '0' && line < lineend)
299 exponent--;
300 } else {
301 /* Large (absolute) value, count digits */
302 for (tline = line; *tline >= '0' &&
303 *tline <= '9' && tline < lineend; tline++)
304 exponent++;
307 /* If the first/next character isn't a digit, value is zero */
308 if (*line < '1' || *line > '9' || line >= lineend) {
309 /* This may be "0", "0.00", "000" or "fubar" but sorts as 0 */
310 /* XXX what about NaN, NAN, inf and INF */
311 *pos++ = 0x80;
312 return pos;
315 /* Maybe here we should allow for e+12 (etc) */
317 if (exponent < 0x40 - MAX_EXP_ENC && -exponent < 0x40 - MAX_EXP_ENC) {
318 /* Value ok for simple encoding */
319 /* exponent 0 is 0xc0 for +ve numbers and 0x40 for -ve ones */
320 exponent += 0xc0;
321 *pos++ = negate ^ exponent;
322 } else {
323 /* Out or range for a single byte */
324 int c, t;
325 t = exponent > 0 ? exponent : -exponent;
326 /* Count how many 8-bit bytes are needed */
327 for (c = 0; ; c++) {
328 t >>= 8;
329 if (t == 0)
330 break;
332 /* 'c' better be 0..3 here - but probably 0..1 */
333 /* Offset just outside valid range */
334 t = c + 0x40 - MAX_EXP_ENC;
335 if (exponent < 0)
336 t = -t;
337 *pos++ = negate ^ (t + 0xc0);
338 /* now add each byte, most significant first */
339 for (; c >= 0; c--)
340 *pos++ = negate ^ (exponent >> (c * 8));
343 /* Finally add mantissa, 2 digits per byte */
344 for (last_nz_pos = pos; line < lineend; ) {
345 if (pos >= bufend)
346 return NULL;
347 ch = *line++;
348 val = (ch - '0') * 10;
349 if (val > 90) {
350 if (ch == DECIMAL_POINT && !had_dp) {
351 had_dp = 1;
352 continue;
354 break;
356 while (line < lineend) {
357 ch = *line++;
358 if (ch == DECIMAL_POINT && !had_dp) {
359 had_dp = 1;
360 continue;
362 if (ch < '0' || ch > '9')
363 line = lineend;
364 else
365 val += ch - '0';
366 break;
368 *pos++ = negate ^ (val + 0x40);
369 if (val != 0)
370 last_nz_pos = pos;
373 /* Add key terminator, deleting any trailing "00" */
374 *last_nz_pos++ = negate;
376 return (last_nz_pos);
379 static u_char *
380 length(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
381 int flag)
383 u_char buf[32];
384 int l;
385 SKIP_BLANKS(line);
386 l = snprintf((char *)buf, sizeof(buf), "%td", lineend - line);
387 return number(pos, bufend, buf, buf + l, flag);
390 #if defined(__minix)
391 static u_char *
392 numhex(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
393 int flag)
395 u_char buf[32];
396 int64_t n = 0;
397 int l;
398 SKIP_BLANKS(line);
399 sscanf((const char *) pos, "%llx", &n);
400 l = snprintf((char *)buf, sizeof(buf), "%lld", n);
401 return number(pos, bufend, buf, buf + l, flag);
403 #endif /* defined(__minix) */