1 /* $NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $ */
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
34 * The Regents of the University of California. All rights reserved.
36 * This code is derived from software contributed to Berkeley by
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 /* Subroutines to generate sort keys. */
68 __RCSID("$NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $");
70 #define SKIP_BLANKS(ptr) { \
71 if (BLANK & d_mask[*(ptr)]) \
72 while (BLANK & d_mask[*(++(ptr))]); \
75 #define NEXTCOL(pos) { \
77 while (BLANK & l_d_mask[*(++pos)]); \
78 while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\
81 static u_char
*enterfield(u_char
*, const u_char
*, struct field
*, int);
82 static u_char
*number(u_char
*, const u_char
*, u_char
*, u_char
*, int);
83 static u_char
*length(u_char
*, const u_char
*, u_char
*, u_char
*, int);
86 static u_char
*numhex(u_char
*, const u_char
*, u_char
*, u_char
*, int);
87 #endif /* defined(__minix) */
89 #define DECIMAL_POINT '.'
92 * constructs sort key with leading recheader, followed by the key,
93 * followed by the original line.
96 enterkey(RECHEADER
*keybuf
, const u_char
*keybuf_end
, u_char
*line_data
,
97 size_t line_size
, struct field fieldtable
[])
98 /* keybuf: pointer to start of key */
102 u_char
*lineend
, *pos
;
103 const u_char
*endkey
;
105 struct coldesc
*clpos
;
111 lineend
= line_data
+ line_size
-1;
112 /* don't include rec_delimiter */
114 for (i
= 0; i
< ncols
; i
++) {
116 for (; (col
< clpos
->num
) && (pos
< lineend
); col
++) {
121 clpos
->start
= SEP_FLAG
? pos
+ 1 : pos
;
125 if (pos
>= lineend
) {
126 clpos
->end
= lineend
;
131 for (; i
<= ncols
; i
++)
132 clist
[i
].start
= clist
[i
].end
= lineend
;
133 if (clist
[0].start
< line_data
)
137 * We write the sort keys (concatenated) followed by the
138 * original line data (for output) as the 'keybuf' data.
139 * keybuf->length is the number of key bytes + data bytes.
140 * keybuf->offset is the number of key bytes.
141 * We add a record separator weight after the key in case
142 * (as is usual) we need to preserve the order of equal lines,
144 * The key itself will have had the correct weight applied.
146 keypos
= keybuf
->data
;
147 endkey
= keybuf_end
- line_size
- 1;
148 if (endkey
<= keypos
)
149 /* No room for any key bytes */
152 for (ftpos
= fieldtable
+ 1; ftpos
->icol
.num
; ftpos
++) {
153 if ((keypos
= enterfield(keypos
, endkey
, ftpos
,
154 fieldtable
->flags
)) == NULL
)
158 keybuf
->offset
= keypos
- keybuf
->data
;
159 keybuf
->length
= keybuf
->offset
+ line_size
;
162 * Posix requires that equal keys be further sorted by the
163 * entire original record.
164 * NetBSD has (at least for some time) kept equal keys in
165 * their original order.
166 * For 'sort -u' posix_sort is unset.
168 keybuf
->keylen
= posix_sort
? keybuf
->length
: keybuf
->offset
;
170 memcpy(keypos
, line_data
, line_size
);
175 * constructs a field (as defined by -k) within a key
178 enterfield(u_char
*tablepos
, const u_char
*endkey
, struct field
*cur_fld
,
181 u_char
*start
, *end
, *lineend
, *mask
, *lweight
;
182 struct column icol
, tcol
;
185 icol
= cur_fld
->icol
;
186 tcol
= cur_fld
->tcol
;
187 flags
= cur_fld
->flags
;
188 start
= icol
.p
->start
;
189 lineend
= clist
[ncols
].end
;
192 start
+= icol
.indent
;
193 start
= min(start
, lineend
);
203 end
= min(end
, lineend
);
209 return length(tablepos
, endkey
, start
, end
, flags
);
211 return number(tablepos
, endkey
, start
, end
, flags
);
214 return numhex(tablepos
, endkey
, start
, end
, flags
);
215 #endif /* defined(__minix) */
217 /* Bound check space - assuming nothing is skipped */
218 if (tablepos
+ (end
- start
) + 1 >= endkey
)
221 mask
= cur_fld
->mask
;
222 lweight
= cur_fld
->weights
;
223 for (; start
< end
; start
++) {
224 if (!mask
|| mask
[*start
]) {
225 *tablepos
++ = lweight
[*start
];
228 /* Add extra byte (absent from lweight) to sort short keys correctly */
229 *tablepos
++ = lweight
[REC_D
];
234 * Numbers are converted to a floating point format (exponent & mantissa)
235 * so that they compare correctly as sequence of unsigned bytes.
236 * Bytes 0x00 and 0xff are used to terminate positive and negative numbers
237 * to ensure that 0.123 sorts after 0.12 and -0.123 sorts before -0.12.
239 * The first byte contain the overall sign, exponent sign and some of the
240 * exponent. These have to be ordered (-ve value, decreasing exponent),
241 * zero, (+ve value, increasing exponent).
243 * The first byte is 0x80 for zero, 0xc0 for +ve with exponent 0.
244 * -ve values are the 1's compliments (so 0x7f isn't used!).
246 * This only leaves 63 byte values for +ve exponents - which isn't enough.
247 * The largest 4 exponent values are used to hold a byte count of the
248 * number of following bytes that contain 8 exponent bits per byte,
249 * This lets us sort exponents from -2^31 to +2^31.
251 * The mantissa is stored 2 digits per byte offset by 0x40, for negative
252 * numbers the order must be reversed (they are bit inverted).
254 * Reverse sorts are done by inverting the sign of the number.
256 #define MAX_EXP_ENC ((int)sizeof(int))
259 number(u_char
*pos
, const u_char
*bufend
, u_char
*line
, u_char
*lineend
,
275 /* Give ourselves space for the key terminator */
278 /* Ensure we have enough space for the exponent */
279 if (pos
+ 1 + MAX_EXP_ENC
> bufend
)
283 if (*line
== '-') { /* set the sign */
286 } else if (*line
== '+') {
290 /* eat initial zeroes */
291 for (; *line
== '0' && line
< lineend
; line
++)
294 /* calculate exponents */
295 if (*line
== DECIMAL_POINT
) {
296 /* Decimal fraction */
298 while (*++line
== '0' && line
< lineend
)
301 /* Large (absolute) value, count digits */
302 for (tline
= line
; *tline
>= '0' &&
303 *tline
<= '9' && tline
< lineend
; tline
++)
307 /* If the first/next character isn't a digit, value is zero */
308 if (*line
< '1' || *line
> '9' || line
>= lineend
) {
309 /* This may be "0", "0.00", "000" or "fubar" but sorts as 0 */
310 /* XXX what about NaN, NAN, inf and INF */
315 /* Maybe here we should allow for e+12 (etc) */
317 if (exponent
< 0x40 - MAX_EXP_ENC
&& -exponent
< 0x40 - MAX_EXP_ENC
) {
318 /* Value ok for simple encoding */
319 /* exponent 0 is 0xc0 for +ve numbers and 0x40 for -ve ones */
321 *pos
++ = negate
^ exponent
;
323 /* Out or range for a single byte */
325 t
= exponent
> 0 ? exponent
: -exponent
;
326 /* Count how many 8-bit bytes are needed */
332 /* 'c' better be 0..3 here - but probably 0..1 */
333 /* Offset just outside valid range */
334 t
= c
+ 0x40 - MAX_EXP_ENC
;
337 *pos
++ = negate
^ (t
+ 0xc0);
338 /* now add each byte, most significant first */
340 *pos
++ = negate
^ (exponent
>> (c
* 8));
343 /* Finally add mantissa, 2 digits per byte */
344 for (last_nz_pos
= pos
; line
< lineend
; ) {
348 val
= (ch
- '0') * 10;
350 if (ch
== DECIMAL_POINT
&& !had_dp
) {
356 while (line
< lineend
) {
358 if (ch
== DECIMAL_POINT
&& !had_dp
) {
362 if (ch
< '0' || ch
> '9')
368 *pos
++ = negate
^ (val
+ 0x40);
373 /* Add key terminator, deleting any trailing "00" */
374 *last_nz_pos
++ = negate
;
376 return (last_nz_pos
);
380 length(u_char
*pos
, const u_char
*bufend
, u_char
*line
, u_char
*lineend
,
386 l
= snprintf((char *)buf
, sizeof(buf
), "%td", lineend
- line
);
387 return number(pos
, bufend
, buf
, buf
+ l
, flag
);
392 numhex(u_char
*pos
, const u_char
*bufend
, u_char
*line
, u_char
*lineend
,
399 sscanf((const char *) pos
, "%llx", &n
);
400 l
= snprintf((char *)buf
, sizeof(buf
), "%lld", n
);
401 return number(pos
, bufend
, buf
, buf
+ l
, flag
);
403 #endif /* defined(__minix) */