4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
35 * sort(1) supports two methods for specifying the sort key: the original,
36 * now-obsolete, +n -m form and the POSIX -k n,m form. We refer to the former
37 * as "old specifiers" and the latter as "new specifiers". The options()
38 * function parses the command line arguments given to sort, placing the sort
39 * key specifiers in the internal representation used in fields.c.
41 * Equivalence of specifiers
42 * One of sort(1)'s standard peculiarities is the transformation of the
43 * character offsets and field numbering between the new and old style field
44 * specifications. We simply quote from the Single Unix standard:
50 * undefined when z == 0, U contains b, and -t is set
51 * -k w+1.x+1T,y.0U when z == 0 otherwise
52 * -k w+1.x+1T,y+1.zU when z > 0
54 * Undoubtedly, this seemed logical at the time. (Using only the field head
55 * as the coordinate, as done in the obsolete version, seems much simpler.)
56 * The reverse map is where the key specifier
62 * undefined when z == 0, U contains b, and -t is set
63 * +w-1.x-1T,y.0U when z == 0 otherwise
64 * +w-1.x-1T,y-1.z when z > 0
66 * in the obsolete syntax. Because the original key specifiers lead to a
67 * simpler implementation, the internal representation of a field in this
68 * implementation of sort is mostly that given by the obsolete syntax.
72 * While a key specifier in the obsolete +m ... -n form is being defined (that
73 * is, before the closing -n is seen), a narrower set of options is permitted.
74 * We specify this smaller set of options in OLD_SPEC_OPTIONS_STRING.
76 #define OPTIONS_STRING "cmuo:T:z:dfiMnrbt:k:S:0123456789"
77 #define OLD_SPEC_OPTIONS_STRING "bdfiMnrcmuo:T:z:t:k:S:"
79 #define OPTIONS_OLDSPEC 0x1 /* else new-style spec */
80 #define OPTIONS_STARTSPEC 0x2 /* else end spec */
87 for (i
= 0; i
< strlen(C
); i
++)
88 if (!isdigit((uchar_t
)C
[i
]))
95 * If a field specified by the -k option or by the +n syntax contains any
96 * modifiers, then the current global field modifiers are not inherited.
99 field_spec_has_modifiers(char *C
, int length
)
101 int p_nonmodifiers
= strspn(C
, ",.1234567890");
103 if (p_nonmodifiers
== length
)
110 field_apply_all(field_t
*fc
, flag_t flags
)
114 for (f
= fc
; f
; f
= f
->f_next
)
115 if ((f
->f_options
& FIELD_MODIFIERS_DEFINED
) == 0)
116 f
->f_options
|= flags
;
120 parse_field_spec(field_t
*F
, char *C
, int flags
, int length
)
122 int p_period
= MIN(length
, strcspn(C
, "."));
123 int p_modifiers
= MIN(length
, strspn(C
, ".1234567890"));
124 int p_boundary
= MIN(p_period
, p_modifiers
);
131 for (i
= 0; i
< p_boundary
; i
++) {
132 if (isdigit((uchar_t
)C
[i
]))
133 field
= (10 * field
) + (C
[i
] - '0');
138 if (p_period
< p_modifiers
) {
139 for (i
= p_period
+ 1; i
< p_modifiers
; i
++) {
140 if (isdigit((uchar_t
)C
[i
])) {
142 offset
= (10 * offset
) + (C
[i
] - '0');
149 if (p_modifiers
< length
) {
150 for (i
= p_modifiers
; i
< length
; i
++) {
156 F
->f_options
|= FIELD_DICTIONARY_ORDER
;
159 F
->f_options
|= FIELD_FOLD_UPPERCASE
;
163 FIELD_IGNORE_NONPRINTABLES
;
166 F
->f_species
= MONTH
;
169 F
->f_species
= NUMERIC
;
173 FIELD_REVERSE_COMPARISONS
;
182 if (flags
& OPTIONS_STARTSPEC
) {
183 F
->f_start_field
= field
;
184 F
->f_start_offset
= offset
;
185 if ((flags
& OPTIONS_OLDSPEC
) != OPTIONS_OLDSPEC
) {
190 F
->f_options
|= blanks_flag
? FIELD_IGNORE_BLANKS_START
: 0;
192 F
->f_end_field
= field
;
193 F
->f_end_offset
= offset
;
194 if ((flags
& OPTIONS_OLDSPEC
) != OPTIONS_OLDSPEC
&&
195 offset_seen
&& offset
!= 0)
197 F
->f_options
|= blanks_flag
? FIELD_IGNORE_BLANKS_END
: 0;
204 parse_new_field_spec(sort_t
*S
, char *arg
)
206 int length
= strlen(arg
);
207 int p_comma
= MIN(length
, strcspn(arg
, ","));
212 * New field specifiers do not inherit from the general specifier if
213 * they have any modifiers set. (This is specifically tested in the VSC
214 * test suite, assertion 32 for POSIX.cmd/sort.)
216 if (field_spec_has_modifiers(arg
, length
)) {
217 nF
= field_new(NULL
);
218 nF
->f_options
= FIELD_MODIFIERS_DEFINED
;
222 p
= parse_field_spec(nF
, arg
, OPTIONS_STARTSPEC
, p_comma
);
227 if (p_comma
< length
) {
228 p
= parse_field_spec(nF
, &(arg
[p_comma
+ 1]), 0,
229 strlen(&(arg
[p_comma
+ 1])));
234 if (nF
->f_start_field
< 0 || nF
->f_start_offset
< 0) {
236 warn("-k %s is not a supported field specifier\n", arg
);
238 nF
->f_start_field
= MAX(nF
->f_start_field
, 0);
239 nF
->f_start_offset
= MAX(nF
->f_start_offset
, 0);
242 * If the starting field exceeds a defined ending field, convention
243 * dictates that the field is ignored.
245 if (nF
->f_end_field
== -1 || nF
->f_start_field
< nF
->f_end_field
||
246 (nF
->f_start_field
== nF
->f_end_field
&&
247 nF
->f_start_offset
< nF
->f_end_offset
)) {
248 field_add_to_chain(&(S
->m_fields_head
), nF
);
249 } else if (S
->m_verbose
) {
250 warn("illegal field -k %s omitted", arg
);
255 * parse_old_field_spec() is getopt()-aware; it may modify the values of optind,
256 * optarg, and so forth, to correctly determine the characteristics being
257 * assigned to the current field.
260 parse_old_field_spec(sort_t
*S
, int argc
, char *argv
[])
264 char *arg
= argv
[optind
];
266 if (field_spec_has_modifiers(arg
+ 1, strlen(arg
+ 1))) {
267 nF
= field_new(NULL
);
268 nF
->f_options
= FIELD_MODIFIERS_DEFINED
;
273 p
= parse_field_spec(nF
, arg
+ 1, OPTIONS_OLDSPEC
| OPTIONS_STARTSPEC
,
282 * In the case that getopt() returns '?' (unrecognized option) or EOF
283 * (non-option argument), the field is considered closed.
285 for (arg
= argv
[++optind
]; optind
< argc
; arg
= argv
[optind
]) {
286 if (strlen(arg
) >= 2 && *arg
== '-' &&
287 isdigit(*(uchar_t
*)(arg
+ 1))) {
288 (void) parse_field_spec(nF
, arg
+ 1,
289 OPTIONS_OLDSPEC
, strlen(arg
) - 1);
290 field_add_to_chain(&(S
->m_fields_head
), nF
);
295 if ((c
= getopt(argc
, argv
, OLD_SPEC_OPTIONS_STRING
)) != EOF
) {
298 nF
->f_options
|= FIELD_IGNORE_BLANKS_START
;
301 nF
->f_options
|= FIELD_DICTIONARY_ORDER
;
304 nF
->f_options
|= FIELD_FOLD_UPPERCASE
;
307 nF
->f_options
|= FIELD_IGNORE_NONPRINTABLES
;
310 nF
->f_species
= MONTH
;
313 nF
->f_species
= NUMERIC
;
316 nF
->f_options
|= FIELD_REVERSE_COMPARISONS
;
323 * Options without arguments.
326 field_add_to_chain(&(S
->m_fields_head
), nF
);
336 * Options with arguments.
338 if (optarg
== argv
[optind
- 1] + 2) {
343 field_add_to_chain(&(S
->m_fields_head
), nF
);
347 die(EMSG_UNKN_OPTION
);
355 field_add_to_chain(&(S
->m_fields_head
), nF
);
360 options(sort_t
*S
, int argc
, char *argv
[])
365 while (optind
< argc
) {
366 if (strncmp("-y", argv
[optind
], strlen("-y")) == 0) {
368 * The -y [kmem] option violates the standard syntax
369 * outlined in intro(1). we have to be a little fancy
370 * to determine if the next argument is a valid integer.
371 * (note, of course, that the previous sort(1) had no
372 * mechanism to resolve a final
377 * -y 99999, file stdin
379 * Now one can unambiguously use
383 * to distinguish these cases.
385 * That said, we do not use the information passed using
386 * -y option in sort(1); we provide the argument to
387 * preserve compatibility for existing scripts.
389 if (strlen(argv
[optind
]) == strlen("-y") &&
391 is_number(argv
[optind
+ 1]))
397 if ((c
= getopt(argc
, argv
, OPTIONS_STRING
)) != EOF
) {
400 S
->m_check_if_sorted_only
= 1;
408 S
->m_unique_lines
= 1;
412 S
->m_output_filename
= optarg
;
416 S
->m_tmpdir_template
= optarg
;
421 * ignore optarg -- obsolete
426 S
->m_field_options
|= FIELD_DICTIONARY_ORDER
;
427 field_apply_all(S
->m_fields_head
,
428 FIELD_DICTIONARY_ORDER
);
432 S
->m_field_options
|= FIELD_FOLD_UPPERCASE
;
433 field_apply_all(S
->m_fields_head
,
434 FIELD_FOLD_UPPERCASE
);
438 S
->m_field_options
|=
439 FIELD_IGNORE_NONPRINTABLES
;
440 field_apply_all(S
->m_fields_head
,
441 FIELD_IGNORE_NONPRINTABLES
);
445 S
->m_default_species
= MONTH
;
446 S
->m_field_options
&=
447 ~FIELD_IGNORE_BLANKS_START
;
451 S
->m_default_species
= NUMERIC
;
455 for (f
= S
->m_fields_head
; f
;
458 FIELD_MODIFIERS_DEFINED
) ==
460 f
->f_species
= NUMERIC
;
465 S
->m_field_options
|=
466 FIELD_IGNORE_BLANKS_START
|
467 FIELD_IGNORE_BLANKS_END
;
471 S
->m_field_options
|=
472 FIELD_REVERSE_COMPARISONS
;
473 field_apply_all(S
->m_fields_head
,
474 FIELD_REVERSE_COMPARISONS
);
481 if (S
->m_single_byte_locale
) {
483 * Most debuggers can't take tabs as
484 * input arguments, so we provide an
485 * escape sequence to allow testing of
486 * this special case for the DEBUG
489 S
->m_field_separator
.sc
=
491 xstreql(optarg
, "\\t") ? '\t' :
495 (void) mbtowc(&S
->m_field_separator
.wc
,
503 (void) parse_new_field_spec(S
, optarg
);
507 S
->m_memory_limit
= strtomem(optarg
);
509 (void) fprintf(stderr
, CMDNAME
510 ": limiting size to %d bytes\n",
516 * We never take a naked -999; these should always be
517 * associated with a preceding +000.
538 * Go back for next argument.
544 * There are three (interpretable) possibilities for getopt() to
545 * return EOF with arguments on the command line: we have seen
546 * the "end-of-options" token, --, we have encountered the
547 * old-style field definition, +NNN, or we have found a
550 * In the second case, we must also search for the optional -NNN
551 * field terminal definition. (since "+joe", for instance, is
552 * a valid filename, we must handle this pattern as well.) This
553 * is performed by parse_old_field_spec().
555 if (xstreql(argv
[optind
- 1], "--")) {
557 * Process all arguments following end-of-options token
560 while (optind
< argc
) {
561 if (xstreql(argv
[optind
], "-"))
562 S
->m_input_from_stdin
= 1;
564 stream_add_file_to_chain(
565 &(S
->m_input_streams
),
574 if (xstreql(argv
[optind
], "-")) {
575 S
->m_input_from_stdin
= 1;
577 } else if (*(argv
[optind
]) != '+' ||
578 !parse_old_field_spec(S
, argc
, argv
)) {
580 * It's a filename, because it either doesn't
581 * start with '+', or if it did, it wasn't an
582 * actual field specifier.
584 stream_add_file_to_chain(&(S
->m_input_streams
),
591 if (S
->m_input_streams
== NULL
)
592 S
->m_input_from_stdin
= 1;
594 if (S
->m_output_filename
== NULL
)
595 S
->m_output_to_stdout
= 1;
598 * If no fields, then one great field. However, if the -b option was
599 * set globally, be sure to ignore it, as per UNIX98.
601 if (S
->m_fields_head
== NULL
) {
602 S
->m_field_options
&= ~FIELD_IGNORE_BLANKS_START
;
604 (void) parse_new_field_spec(S
, "1");
606 * "Entire line" fast path is only valid if no delimiter has
607 * been set and no modifiers have been applied.
609 if (S
->m_field_separator
.wc
== 0 &&
610 S
->m_default_species
== ALPHA
&&
611 S
->m_field_options
== 0)
612 S
->m_entire_line
= 1;