1 /* vi: set sw=4 ts=4: */
3 * Mini tr implementation for busybox
5 ** Copyright (c) 1987,1997, Prentice Hall All rights reserved.
7 * The name of Prentice Hall may not be used to endorse or promote
8 * products derived from this software without specific prior
11 * Copyright (c) Michiel Huisjes
13 * This version of tr is adapted from Minix tr and was modified
14 * by Erik Andersen <andersen@codepoet.org> to be used in busybox.
16 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
18 /* http://www.opengroup.org/onlinepubs/009695399/utilities/tr.html
19 * TODO: xdigit, graph, print
25 #define TR_OPT_complement (1<<0)
26 #define TR_OPT_delete (1<<1)
27 #define TR_OPT_squeeze_reps (1<<2)
28 /* some "globals" shared across this file */
29 /* these last are pointers to static buffers declared in tr_main */
30 static char *poutput
, *pvector
, *pinvec
, *poutvec
;
32 static void ATTRIBUTE_NORETURN
convert(const smalluint flags
)
34 size_t read_chars
= 0, in_index
= 0, out_index
= 0, c
, coded
, last
= -1;
37 /* If we're out of input, flush output and read more input. */
38 if (in_index
== read_chars
) {
40 xwrite(STDOUT_FILENO
, (char *)poutput
, out_index
);
43 if ((read_chars
= read(STDIN_FILENO
, bb_common_bufsiz1
, BUFSIZ
)) <= 0) {
44 if (write(STDOUT_FILENO
, (char *)poutput
, out_index
) != out_index
)
45 bb_perror_msg(bb_msg_write_error
);
50 c
= bb_common_bufsiz1
[in_index
++];
52 if ((flags
& TR_OPT_delete
) && pinvec
[c
])
54 if ((flags
& TR_OPT_squeeze_reps
) && last
== coded
&&
55 (pinvec
[c
] || poutvec
[coded
]))
57 poutput
[out_index
++] = last
= coded
;
62 static void map(unsigned char *string1
, unsigned int string1_len
,
63 unsigned char *string2
, unsigned int string2_len
)
68 for (j
= 0, i
= 0; i
< string1_len
; i
++) {
70 pvector
[string1
[i
]] = last
;
72 pvector
[string1
[i
]] = last
= string2
[j
++];
76 /* supported constructs:
77 * Ranges, e.g., [0-9] ==> 0123456789
78 * Escapes, e.g., \a ==> Control-G
79 * Character classes, e.g. [:upper:] ==> A ... Z
81 static unsigned int expand(const char *arg
, char *buffer
)
83 char *buffer_start
= buffer
;
84 unsigned i
; /* XXX: FIXME: use unsigned char? */
87 static const char * const classes
[] = {
88 "alpha"CLO
, "alnum"CLO
, "digit"CLO
, "lower"CLO
, "upper"CLO
, "space"CLO
,
89 "blank"CLO
, "punct"CLO
, "cntrl"CLO
, NULL
91 #define CLASS_invalid 0 /* we increment the retval */
100 #define CLASS_cntrl 9
101 //#define CLASS_xdigit 10
102 //#define CLASS_graph 11
103 //#define CLASS_print 12
107 *buffer
++ = bb_process_escape_sequence(&arg
);
108 } else if (*(arg
+1) == '-') {
117 arg
+= 3; /* Skip the assumed a-z */
118 } else if (*arg
== '[') {
121 if (ENABLE_FEATURE_TR_CLASSES
&& i
== ':') {
123 { /* not really pretty.. */
124 char *tmp
= xstrndup(arg
, 7); // warning: xdigit needs 8, not 7
125 j
= index_in_str_array(classes
, tmp
) + 1;
128 if (j
== CLASS_alnum
|| j
== CLASS_digit
) {
129 for (i
= '0'; i
<= '9'; i
++)
132 if (j
== CLASS_alpha
|| j
== CLASS_alnum
|| j
== CLASS_upper
) {
133 for (i
= 'A'; i
<= 'Z'; i
++)
136 if (j
== CLASS_alpha
|| j
== CLASS_alnum
|| j
== CLASS_lower
) {
137 for (i
= 'a'; i
<= 'z'; i
++)
140 if (j
== CLASS_space
|| j
== CLASS_blank
) {
142 if (j
== CLASS_space
) {
150 if (j
== CLASS_punct
|| j
== CLASS_cntrl
) {
151 for (i
= 0; i
<= ASCII
; i
++)
152 if ((j
== CLASS_punct
&&
153 isprint(i
) && (!isalnum(i
)) && (!isspace(i
))) ||
154 (j
== CLASS_cntrl
&& iscntrl(i
)))
157 if (j
== CLASS_invalid
) {
164 if (ENABLE_FEATURE_TR_EQUIV
&& i
== '=') {
166 arg
+= 3; /* Skip the closing =] */
177 arg
++; /* Skip the assumed ']' */
181 return (buffer
- buffer_start
);
184 static int complement(char *buffer
, int buffer_len
)
187 char conv
[ASCII
+ 2];
190 for (i
= 0; i
<= ASCII
; i
++) {
191 for (j
= 0; j
< buffer_len
; j
++)
195 conv
[ix
++] = i
& ASCII
;
197 memcpy(buffer
, conv
, ix
);
201 int tr_main(int argc
, char **argv
);
202 int tr_main(int argc
, char **argv
)
205 int output_length
= 0, input_length
;
209 RESERVE_CONFIG_UBUFFER(output
, BUFSIZ
);
210 RESERVE_CONFIG_BUFFER(vector
, ASCII
+1);
211 RESERVE_CONFIG_BUFFER(invec
, ASCII
+1);
212 RESERVE_CONFIG_BUFFER(outvec
, ASCII
+1);
214 /* ... but make them available globally */
220 if (argc
> 1 && argv
[idx
][0] == '-') {
221 for (ptr
= (unsigned char *) &argv
[idx
][1]; *ptr
; ptr
++) {
223 flags
|= TR_OPT_complement
;
224 else if (*ptr
== 'd')
225 flags
|= TR_OPT_delete
;
226 else if (*ptr
== 's')
227 flags
|= TR_OPT_squeeze_reps
;
233 for (i
= 0; i
<= ASCII
; i
++) {
235 invec
[i
] = outvec
[i
] = FALSE
;
238 if (argv
[idx
] != NULL
) {
239 input_length
= expand(argv
[idx
++], bb_common_bufsiz1
);
240 if (flags
& TR_OPT_complement
)
241 input_length
= complement(bb_common_bufsiz1
, input_length
);
242 if (argv
[idx
] != NULL
) {
243 if (*argv
[idx
] == '\0')
244 bb_error_msg_and_die("STRING2 cannot be empty");
245 output_length
= expand(argv
[idx
], output
);
246 map(bb_common_bufsiz1
, input_length
, output
, output_length
);
248 for (i
= 0; i
< input_length
; i
++)
249 invec
[(unsigned char)bb_common_bufsiz1
[i
]] = TRUE
;
250 for (i
= 0; i
< output_length
; i
++)
251 outvec
[output
[i
]] = TRUE
;