unstack, sort: cleanup and improvement
[minix.git] / commands / sort / sort.c
blob5481d277932e536d256b6e1490b67230ff383034
1 /* sort - sort a file of lines Author: Michiel Huisjes */
3 /* SYNOPSIS:
4 * sort [-funbirdcmt'x'] [+beg_pos[opts] [-end_pos]] [-o outfile] [file]..
6 * [opts] can be any of
7 * -f : Fold upper case to lower.
8 * -n : Sort to numeric value (optional decimal point) implies -b
9 * -b : Skip leading blanks
10 * -i : Ignore chars outside ASCII range (040 - 0176)
11 * -r : Reverse the sense of comparisons.
12 * -d : Sort to dictionary order. Only letters, digits, comma's and points
13 * are compared.
14 * If any of these flags are used in [opts], then they override all global
15 * ordering for this field.
17 * I/O control flags are:
18 * -u : Print uniq lines only once.
19 * -c : Check if files are sorted in order.
20 * -m : Merge already sorted files.
21 * -o outfile : Name of output file. (Can be one of the input files).
22 * Default is stdout.
23 * - : Take stdin as input.
25 * Fields:
26 * -t'x' : Field separating character is 'x'
27 * +a.b : Start comparing at field 'a' with offset 'b'. A missing 'b' is
28 * taken to be 0.
29 * -a.b : Stop comparing at field 'a' with offset 'b'. A missing 'b' is
30 * taken to be 0.
31 * A missing -a.b means the rest of the line.
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <fcntl.h>
37 #include <signal.h>
38 #include <unistd.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <stdio.h>
42 #include <limits.h>
44 #define OPEN_FILES (OPEN_MAX-4) /* Nr of open files per process */
45 #if __minix_vmd
46 #define MEMORY_SIZE (1024 * 1024)
47 #else
48 #define MEMORY_SIZE ((10 * sizeof(int)) * 1024)
49 #endif
50 /* Total mem_size */
51 #define LINE_SIZE (1024 >> 1) /* Max length of a line */
52 #define IO_SIZE (2 * 1024) /* Size of buffered output */
53 #define STD_OUT 1 /* Fd of terminal */
55 /* Return status of functions */
56 #define OK 0
57 #define ERROR -1
59 /* Compare return values */
60 #define LOWER -1
61 #define SAME 0
62 #define HIGHER 1
64 /* Table definitions. */
65 #define DICT 0x001 /* Alpha, numeric, letters and . */
66 #define ASCII 0x002 /* All between ' ' and '~' */
67 #define BLANK 0x004 /* ' ' and '\t' */
68 #define DIGIT 0x008 /* 0-9 */
69 #define UPPER 0x010 /* A-Z */
71 typedef int BOOL;
73 #define FALSE 0
74 #define TRUE 1
76 typedef struct {
77 int fd; /* Fd of file */
78 char *buffer; /* Buffer for reads */
79 int read_chars; /* Nr of chars actually read in buffer */
80 int cnt; /* Nr of chars taken out of buffer */
81 char *line; /* Contains line currently used */
82 } MERGE;
84 MERGE merge_f[OPEN_FILES]; /* Merge structs */
85 int buf_size; /* Size of core available for each struct */
87 #define FIELDS_LIMIT 10 /* 1 global + 9 user */
88 #define GLOBAL 0
90 typedef struct {
91 int beg_field, beg_pos; /* Begin field + offset */
92 int end_field, end_pos; /* End field + offset. ERROR == EOLN */
93 BOOL reverse; /* TRUE if rev. flag set on this field */
94 BOOL blanks;
95 BOOL dictionary;
96 BOOL fold_case;
97 BOOL ascii;
98 BOOL numeric;
99 BOOL hexmode;
100 } FIELD;
102 /* Field declarations. A total of FILEDS_LIMIT is allowed */
103 FIELD fields[FIELDS_LIMIT];
104 int field_cnt; /* Nr of field actually assigned */
106 /* Various output control flags */
107 BOOL check = FALSE;
108 BOOL only_merge = FALSE;
109 BOOL uniq = FALSE;
111 char *mem_top; /* Mem_top points to lowest pos of memory. */
112 char *cur_pos; /* First free position in mem */
113 char **line_table; /* Pointer to the internal line table */
114 BOOL in_core = TRUE; /* Set if input cannot all be sorted in core */
116 /* Place where temp_files should be made */
117 char temp_files[] = "/tmp/sort.XXXXX.XX";
118 char *output_file; /* Name of output file */
119 int out_fd; /* Fd to output file (could be STD_OUT) */
120 char out_buffer[IO_SIZE]; /* For buffered output */
122 char **argptr; /* Pointer to argv structure */
123 int args_offset; /* Nr of args spilled on options */
124 int args_limit; /* Nr of args given */
126 char separator; /* Char that separates fields */
127 int nr_of_files = 0; /* Nr_of_files to be merged */
128 int disabled; /* Nr of files done */
130 char USAGE[] = "Usage: sort [-funbirdcmt'x'] [+beg_pos [-end_pos]] [-o outfile] [file] ..";
132 /* Forward declarations */
133 int main(int argc, char **argv);
134 void get_opts(char *ptr, FIELD * field);
135 void new_field(FIELD * field, int *offset, BOOL beg_fl);
136 void adjust_options(FIELD * field);
137 void error(BOOL quit, char *message, char *arg);
138 void open_outfile(void);
139 void get_file(int fd, off_t size);
140 int last_line(void);
141 void print_table(int fd);
142 char *file_name(int nr);
143 void mread(int fd, char *address, int bytes);
144 void mwrite(int fd, char *address, int bytes);
145 void sort(void);
146 void sort_table(int nel);
147 void incr(int si, int ei);
148 int cmp_fields(char *el1, char *el2);
149 void build_field(char *dest, FIELD * field, char *src);
150 char *skip_fields(char *str, int nf);
151 int compare(char *el1, char *el2);
152 int cmp(unsigned char *el1, unsigned char *el2, FIELD * field);
153 int digits(char *str1, char *str2, BOOL check_sign);
154 int hexits(char *str1, char *str2);
155 void files_merge(int file_cnt);
156 void merge(int start_file, int limit_file);
157 void put_line(char *line);
158 MERGE * print(MERGE * merg, int file_cnt);
159 int read_line(MERGE * merg);
160 MERGE * skip_lines(MERGE * smallest, int file_cnt);
161 void uniq_lines(MERGE * merg);
162 void check_file(int fd, char *file);
163 int length(char *line);
164 void copy(char *dest, char *src);
165 char *msbrk(int size);
166 void mbrk(char *address);
167 void catch(int dummy);
169 /* Table of all chars. 0 means no special meaning. */
170 char table[256] = {
171 /* '^@' to space */
172 0, 0, 0, 0, 0, 0, 0, 0,
173 0, BLANK | DICT, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0,
177 /* Space to '0' */
178 BLANK | DICT | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
179 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
180 ASCII, ASCII,
182 /* '0' until '9' */
183 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
184 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
185 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
186 DIGIT | DICT | ASCII,
188 /* ASCII from ':' to '@' */
189 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
191 /* Upper case letters 'A' to 'Z' */
192 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
193 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
194 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
195 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
196 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
197 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
198 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
199 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
200 UPPER | DICT | ASCII, UPPER | DICT | ASCII,
202 /* ASCII from '[' to '`' */
203 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
205 /* Lower case letters from 'a' to 'z' */
206 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
207 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
208 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
209 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
210 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
211 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
212 DICT | ASCII, DICT | ASCII,
214 /* ASCII from '{' to '~' */
215 ASCII, ASCII, ASCII, ASCII,
217 /* Stuff from -1 to -177 */
218 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 0, 0, 0, 0, 0, 0, 0, 0, 0,
226 0, 0, 0, 0, 0, 0, 0
231 * Get_opts () assigns the options into the field structure as described in ptr.
232 * This field structure could be the GLOBAL one.
234 void get_opts(ptr, field)
235 register char *ptr;
236 register FIELD *field;
238 switch (*ptr) {
239 case 'b': /* Skip leading blanks */
240 field->blanks = TRUE;
241 break;
242 case 'd': /* Dictionary order */
243 field->dictionary = TRUE;
244 break;
245 case 'f': /* Fold upper case to lower */
246 field->fold_case = TRUE;
247 break;
248 case 'i': /* Skip chars outside ' ' '~' */
249 field->ascii = TRUE;
250 break;
251 case 'n': /* Sort on numeric */
252 field->numeric = TRUE;
253 field->blanks = TRUE;
254 break;
255 case 'x':
256 field->hexmode = TRUE;
257 field->blanks = TRUE;
258 break;
259 case 'r': /* Reverse comparisons */
260 field->reverse = TRUE;
261 break;
262 default: /* Illegal options */
263 error(TRUE, USAGE, NULL);
267 /* New_field () assigns a new field as described by the arguments.
268 * A field description is of the form: +a.b[opts] -c.d, where b and d, as well
269 * as -c.d and [opts] are optional. Nr before digit is field nr. Nr after digit
270 * is offset from field.
272 void new_field(field, offset, beg_fl)
273 register FIELD *field; /* Field to assign */
274 int *offset; /* Offset in argv structure */
275 BOOL beg_fl; /* Assign beg or end of field */
277 register char *ptr;
279 ptr = argptr[*offset];
280 *offset += 1; /* Incr offset to next arg */
281 ptr++;
283 if (beg_fl)
284 field->beg_field = atoi(ptr); /* Assign int of first field */
285 else
286 field->end_field = atoi(ptr);
288 while (table[*ptr] & DIGIT) /* Skip all digits */
289 ptr++;
291 if (*ptr == '.') { /* Check for offset */
292 ptr++;
293 if (beg_fl)
294 field->beg_pos = atoi(ptr);
295 else
296 field->end_pos = atoi(ptr);
297 while (table[*ptr] & DIGIT) /* Skip digits */
298 ptr++;
300 if (beg_fl) {
301 while (*ptr != '\0') /* Check options after field */
302 get_opts(ptr++, field);
304 if (beg_fl) { /* Check for end pos */
305 ptr = argptr[*offset];
306 if (ptr && *ptr == '-' && ((table[*(ptr + 1)] & DIGIT) || *(ptr + 1) == '.')) {
307 new_field(field, offset, FALSE);
308 if (field->beg_field > field->end_field)
309 error(TRUE, "End field is before start field!", NULL);
310 } else /* No end pos. */
311 field->end_field = ERROR;
315 int main(argc, argv)
316 int argc;
317 char *argv[];
319 int arg_count = 1; /* Offset in argv */
320 struct stat st;
321 register char *ptr; /* Ptr to *argv in use */
322 register int fd;
323 int pid, pow;
325 argptr = argv;
326 cur_pos = mem_top = msbrk(MEMORY_SIZE); /* Find lowest mem. location */
328 while (arg_count < argc && ((ptr = argv[arg_count])[0] == '-' || *ptr == '+')) {
329 if (*ptr == '-' && *(ptr + 1) == '\0') /* "-" means stdin */
330 break;
331 if (*ptr == '+') { /* Assign field. */
332 if (++field_cnt == FIELDS_LIMIT)
333 error(TRUE, "Too many fields", NULL);
334 new_field(&fields[field_cnt], &arg_count, TRUE);
335 } else { /* Get output options */
336 while (*++ptr) {
337 switch (*ptr) {
338 case 'c': /* Only check file */
339 check = TRUE;
340 break;
341 case 'm': /* Merge (sorted) files */
342 only_merge = TRUE;
343 break;
344 case 'u': /* Only give uniq lines */
345 uniq = TRUE;
346 break;
347 case 'o': /* Name of output file */
348 output_file = argv[++arg_count];
349 break;
350 case 't': /* Field separator */
351 ptr++;
352 separator = *ptr;
353 break;
354 default: /* Sort options */
355 get_opts(ptr, &fields[GLOBAL]);
358 arg_count++;
362 for (fd = 1; fd <= field_cnt; fd++) adjust_options(&fields[fd]);
364 /* Create name of tem_files 'sort.pid.aa' */
365 ptr = &temp_files[10];
366 pid = getpid();
367 pow = 10000;
368 while (pow != 0) {
369 *ptr++ = pid / pow + '0';
370 pid %= pow;
371 pow /= 10;
374 signal(SIGINT, catch);
376 /* Only merge files. Set up */
377 if (only_merge) {
378 args_limit = args_offset = arg_count;
379 while (argv[args_limit] != NULL)
380 args_limit++; /* Find nr of args */
381 files_merge(args_limit - arg_count);
382 exit(0);
384 if (arg_count == argc) { /* No args left. Use stdin */
385 if (check)
386 check_file(0, NULL);
387 else
388 get_file(0, (off_t) 0);
389 } else
390 while (arg_count < argc) { /* Sort or check args */
391 if (strcmp(argv[arg_count], "-") == 0)
392 fd = 0;
393 else if (stat(argv[arg_count], &st) < 0) {
394 error(FALSE, "Cannot find ", argv[arg_count++]);
395 continue;
398 /* Open files */
399 else if ((fd = open(argv[arg_count], O_RDONLY)) < 0) {
400 error(FALSE, "Cannot open ", argv[arg_count++]);
401 continue;
403 if (check)
404 check_file(fd, argv[arg_count]);
405 else /* Get_file reads whole file */
406 get_file(fd, st.st_size);
407 arg_count++;
410 if (check) exit(0);
412 sort(); /* Sort whatever is left */
414 if (nr_of_files == 1) /* Only one file sorted -> don't merge */
415 exit(0);
417 files_merge(nr_of_files);
418 return(0);
421 /* Adjust_options() assigns all global variables set also in the fields
422 * assigned.
424 void adjust_options(field)
425 register FIELD *field;
427 register FIELD *gfield = &fields[GLOBAL];
429 if (gfield->reverse) field->reverse = TRUE;
430 if (gfield->blanks) field->blanks = TRUE;
431 if (gfield->dictionary) field->dictionary = TRUE;
432 if (gfield->fold_case) field->fold_case = TRUE;
433 if (gfield->ascii) field->ascii = TRUE;
434 if (gfield->numeric) field->numeric = TRUE;
437 /* Error () prints the error message on stderr and exits if quit == TRUE. */
438 void error(quit, message, arg)
439 register BOOL quit;
440 register char *message, *arg;
442 write(2, message, strlen(message));
443 if (arg != NULL) write(2, arg, strlen(arg));
444 perror(" ");
445 if (quit) exit(1);
448 /* Open_outfile () assigns to out_fd the fd where the output must go when all
449 * the sorting is done.
451 void open_outfile()
453 if (output_file == NULL)
454 out_fd = STD_OUT;
455 else if ((out_fd = creat(output_file, 0644)) < 0)
456 error(TRUE, "Cannot creat ", output_file);
459 /* Get_file reads the whole file of filedescriptor fd. If the file is too big
460 * to keep in core, a partial sort is done, and the output is stashed somewhere.
462 void get_file(fd, size)
463 int fd; /* Fd of file to read */
464 register off_t size; /* Size of file */
466 register int i;
467 int rest; /* Rest in memory */
468 char save_ch; /* Used in stdin readings */
470 rest = MEMORY_SIZE - (cur_pos - mem_top);
471 if (fd == 0) { /* We're reding stdin */
472 while ((i = read(0, cur_pos, rest)) > 0) {
473 if ((cur_pos - mem_top) + i == MEMORY_SIZE) {
474 in_core = FALSE;
475 i = last_line(); /* End of last line */
476 save_ch = mem_top[i];
477 mem_top[i] = '\0';
478 sort(); /* Sort core */
479 mem_top[i] = save_ch; /* Restore erased char */
480 /* Restore last (half read) line */
481 for (rest = 0; i + rest != MEMORY_SIZE; rest++)
482 mem_top[rest] = mem_top[i + rest];
483 /* Assign current pos. in memory */
484 cur_pos = &mem_top[rest];
485 } else { /* Fits, just assign position in mem. */
486 cur_pos = cur_pos + i;
487 *cur_pos = '\0';
490 /* Calculate rest of mem */
491 rest = MEMORY_SIZE - (cur_pos - mem_top);
495 /* Reading file. Check size */
496 else if (size > rest) { /* Won't fit */
497 mread(fd, cur_pos, rest);
498 in_core = FALSE;
499 i = last_line(); /* Get pos. of last line */
500 mem_top[i] = '\0'; /* Truncate */
501 (void) lseek(fd, (off_t) (i - MEMORY_SIZE), SEEK_CUR); /* Do this next time */
502 size = size - rest - i + MEMORY_SIZE; /* Calculate rest */
503 cur_pos = mem_top; /* Reset mem */
504 sort(); /* Sort core */
505 get_file(fd, size); /* Get rest of file */
506 } else { /* Fits. Just read in */
507 rest = size;
508 mread(fd, cur_pos, rest);
509 cur_pos = cur_pos + rest; /* Reassign cur_pos */
510 *cur_pos = '\0';
511 (void) close(fd); /* File completed */
515 /* Last_line () find the last line in core and retuns the offset from the top
516 * of the memory.
518 int last_line()
520 register int i;
522 for (i = MEMORY_SIZE - 2; i > 0; i--)
523 if (mem_top[i] == '\n') break;
524 return i + 1;
527 /* Print_table prints the line table in the given file_descriptor. If the fd
528 * equals ERROR, it opens a temp_file itself.
530 void print_table(fd)
531 int fd;
533 register char **line_ptr; /* Ptr in line_table */
534 register char *ptr; /* Ptr to line */
535 int index = 0; /* Index in output buffer */
537 if (fd == ERROR) {
538 if ((fd = creat(file_name(nr_of_files), 0644)) < 0)
539 error(TRUE, "Cannot creat ", file_name(nr_of_files));
541 for (line_ptr = line_table; *line_ptr != NULL; line_ptr++) {
542 ptr = *line_ptr;
543 /* Skip all same lines if uniq is set */
544 if (uniq && *(line_ptr + 1) != NULL) {
545 if (compare(ptr, *(line_ptr + 1)) == SAME) continue;
547 do { /* Print line in a buffered way */
548 out_buffer[index++] = *ptr;
549 if (index == IO_SIZE) {
550 mwrite(fd, out_buffer, IO_SIZE);
551 index = 0;
553 } while (*ptr++ != '\n');
555 mwrite(fd, out_buffer, index);/* Flush buffer */
556 (void) close(fd); /* Close file */
557 nr_of_files++; /* Increment nr_of_files to merge */
560 /* File_name () returns the nr argument from the argument list, or a uniq
561 * filename if the nr is too high, or the arguments were not merge files.
563 char *file_name(nr)
564 register int nr;
566 if (only_merge) {
567 if (args_offset + nr < args_limit) return argptr[args_offset + nr];
569 temp_files[16] = nr / 26 + 'a';
570 temp_files[17] = nr % 26 + 'a';
572 return temp_files;
575 /* Mread () performs a normal read (), but checks the return value. */
576 void mread(fd, address, bytes)
577 int fd;
578 char *address;
579 register int bytes;
581 if (read(fd, address, bytes) < 0 && bytes != 0)
582 error(TRUE, "Read error", NULL);
585 /* Mwrite () performs a normal write (), but checks the return value. */
586 void mwrite(fd, address, bytes)
587 int fd;
588 char *address;
589 register int bytes;
591 if (write(fd, address, bytes) != bytes && bytes != 0)
592 error(TRUE, "Write error", NULL);
595 /* Sort () sorts the input in memory starting at mem_top. */
596 void sort()
598 register char *ptr = mem_top;
599 register int count = 0;
601 /* Count number of lines in memory */
602 while (*ptr) {
603 if (*ptr++ == '\n') count++;
606 /* Set up the line table */
607 line_table = (char **) msbrk(count * sizeof(char *) + sizeof(char *));
609 count = 1;
610 ptr = line_table[0] = mem_top;
611 while (*ptr) {
612 if (*ptr++ == '\n') line_table[count++] = ptr;
615 line_table[count - 1] = NULL;
617 /* Sort the line table */
618 sort_table(count - 1);
620 /* Stash output somewhere */
621 if (in_core) {
622 open_outfile();
623 print_table(out_fd);
624 } else
625 print_table(ERROR);
627 /* Free line table */
628 mbrk((char *) line_table);
631 /* Sort_table () sorts the line table consisting of nel elements. */
632 void sort_table(nel)
633 register int nel;
635 char *tmp;
636 register int i;
638 /* Make heap */
639 for (i = (nel >> 1); i >= 1; i--) incr(i, nel);
641 /* Sort from heap */
642 for (i = nel; i > 1; i--) {
643 tmp = line_table[0];
644 line_table[0] = line_table[i - 1];
645 line_table[i - 1] = tmp;
646 incr(1, i - 1);
650 /* Incr () increments the heap. */
651 void incr(si, ei)
652 register int si, ei;
654 char *tmp;
656 while (si <= (ei >> 1)) {
657 si <<= 1;
658 if (si + 1 <= ei && compare(line_table[si - 1], line_table[si]) <= 0)
659 si++;
660 if (compare(line_table[(si >> 1) - 1], line_table[si - 1]) >= 0)
661 return;
662 tmp = line_table[(si >> 1) - 1];
663 line_table[(si >> 1) - 1] = line_table[si - 1];
664 line_table[si - 1] = tmp;
668 /* Cmp_fields builds new lines out of the lines pointed to by el1 and el2 and
669 * puts it into the line1 and line2 arrays. It then calls the cmp () routine
670 * with the field describing the arguments.
672 int cmp_fields(el1, el2)
673 register char *el1, *el2;
675 int i, ret;
676 char line1[LINE_SIZE], line2[LINE_SIZE];
678 for (i = 0; i < field_cnt; i++) { /* Setup line parts */
679 build_field(line1, &fields[i + 1], el1);
680 build_field(line2, &fields[i + 1], el2);
681 if ((ret = cmp((unsigned char *) line1, (unsigned char *) line2,
682 &fields[i + 1])) != SAME)
683 break; /* If equal, try next field */
686 /* Check for reverse flag */
687 if (i != field_cnt && fields[i + 1].reverse) return -ret;
689 /* Else return the last return value of cmp () */
690 return ret;
693 /* Build_field builds a new line from the src as described by the field.
694 * The result is put in dest.
696 void build_field(dest, field, src)
697 char *dest; /* Holds result */
698 register FIELD *field; /* Field description */
699 register char *src; /* Source line */
701 char *begin = src; /* Remember start location */
702 char *last; /* Pointer to end location */
703 int i;
705 /* Skip begin fields */
706 src = skip_fields(src, field->beg_field);
708 /* Skip begin positions */
709 for (i = 0; i < field->beg_pos && *src != '\n'; i++) src++;
711 /* Copy whatever is left */
712 copy(dest, src);
714 /* If end field is assigned truncate (perhaps) the part copied */
715 if (field->end_field != ERROR) { /* Find last field */
716 last = skip_fields(begin, field->end_field);
717 /* Skip positions as given by end fields description */
718 for (i = 0; i < field->end_pos && *last != '\n'; i++) last++;
719 dest[last - src] = '\n';/* Truncate line */
723 /* Skip_fields () skips nf fields of the line pointed to by str. */
724 char *skip_fields(str, nf)
725 register char *str;
726 int nf;
728 while (nf-- > 0) {
729 if (separator == '\0') {/* Means ' ' or '\t' */
730 while (*str != ' ' && *str != '\t' && *str != '\n') str++;
731 while (table[*str] & BLANK) str++;
732 } else {
733 while (*str != separator && *str != '\n') str++;
734 if (*str == separator) str++;
737 return str; /* Return pointer to indicated field */
740 /* Compare is called by all sorting routines. It checks if fields assignments
741 * has been made. if so, it calls cmp_fields (). If not, it calls cmp () and
742 * reversed the return value if the (global) reverse flag is set.
744 int compare(el1, el2)
745 register char *el1, *el2;
747 int ret;
749 if (field_cnt > GLOBAL) return cmp_fields(el1, el2);
751 ret = cmp((unsigned char *) el1, (unsigned char *) el2, &fields[GLOBAL]);
752 return(fields[GLOBAL].reverse) ? -ret : ret;
755 /* Cmp () is the actual compare routine. It compares according to the
756 * description given in the field pointer.
758 int cmp(el1, el2, field)
759 register unsigned char *el1, *el2;
760 FIELD *field;
762 int c1, c2;
764 if (field->blanks) { /* Skip leading blanks */
765 while (table[*el1] & BLANK) el1++;
766 while (table[*el2] & BLANK) el2++;
768 if (field->numeric) /* Compare numeric */
769 return digits((char *) el1, (char *) el2, TRUE);
770 if (field->hexmode) /* Compare hex */
771 return hexits((char *) el1, (char *) el2);
773 for (;;) {
774 while (*el1 == *el2) {
775 if (*el1++ == '\n') /* EOLN on both strings */
776 return SAME;
777 el2++;
779 if (*el1 == '\n') /* EOLN on string one */
780 return LOWER;
781 if (*el2 == '\n') return HIGHER;
782 if (field->ascii) { /* Skip chars outside 040 - 0177 */
783 if ((table[*el1] & ASCII) == 0) {
784 do {
785 el1++;
786 } while ((table[*el1] & ASCII) == 0);
787 continue;
789 if ((table[*el2] & ASCII) == 0) {
790 do {
791 el2++;
792 } while ((table[*el2] & ASCII) == 0);
793 continue;
796 if (field->dictionary) {/* Skip non-dict chars */
797 if ((table[*el1] & DICT) == 0) {
798 do {
799 el1++;
800 } while ((table[*el1] & DICT) == 0);
801 continue;
803 if ((table[*el2] & DICT) == 0) {
804 do {
805 el2++;
806 } while ((table[*el2] & DICT) == 0);
807 continue;
810 if (field->fold_case) { /* Fold upper case to lower */
811 if (table[c1 = *el1++] & UPPER) c1 += 'a' - 'A';
812 if (table[c2 = *el2++] & UPPER) c2 += 'a' - 'A';
813 if (c1 == c2) continue;
814 return c1 - c2;
816 return *el1 - *el2;
819 /* NOTREACHED */
822 int hexits(char *str1, char *str2)
824 unsigned long v1, v2;
825 int r1, r2;
826 r1 = sscanf(str1, "0x%lx", &v1);
827 r2 = sscanf(str2, "0x%lx", &v2);
829 /* ordering based on reasonable hex number */
830 if(r1 == 1 && r2 != 1) return HIGHER;
831 if(r1 != 1 && r2 == 1) return LOWER;
832 if(r1 != 1 && r2 != 1) return SAME;
834 if(v1 > v2) return HIGHER;
835 if(v1 < v2) return LOWER;
837 return SAME;
841 * Digits compares () the two strings that point to a number of digits followed
842 * by an optional decimal point.
844 int digits(str1, str2, check_sign)
845 register char *str1, *str2;
846 BOOL check_sign; /* True if sign must be checked */
848 BOOL negative = FALSE; /* True if negative numbers */
849 int diff, pow, ret;
851 /* Check for optional minus or plus sign */
852 if (check_sign) {
853 if (*str1 == '-') {
854 negative = TRUE;
855 str1++;
856 } else if (*str1 == '+')
857 str1++;
859 if (*str2 == '-') {
860 if (negative == FALSE) return HIGHER;
861 str2++;
862 } else if (negative)
863 return LOWER;
864 else if (*str2 == '+')
865 str2++;
868 /* Keep incrementing as long as digits are available and equal */
869 while ((table[*str1] & DIGIT) && table[*str2] & DIGIT) {
870 if (*str1 != *str2) break;
871 str1++;
872 str2++;
875 /* First check for the decimal point. */
876 if (*str1 == '.' || *str2 == '.') {
877 if (*str1 == '.') {
878 if (*str2 == '.') /* Both. Check decimal part */
879 ret = digits(str1 + 1, str2 + 1, FALSE);
880 else
881 ret = (table[*str2] & DIGIT) ? LOWER : HIGHER;
882 } else
883 ret = (table[*str1] & DIGIT) ? HIGHER : LOWER;
886 /* Now either two digits differ, or unknown char is seen (e.g. end of string) */
887 else if ((table[*str1] & DIGIT) && (table[*str2] & DIGIT)) {
888 diff = *str1 - *str2; /* Basic difference */
889 pow = 0; /* Check power of numbers */
890 while (table[*str1++] & DIGIT) pow++;
891 while (table[*str2++] & DIGIT) pow--;
892 ret = (pow == 0) ? diff : pow;
895 /* Unknown char. Check on which string it occurred */
896 else {
897 if ((table[*str1] & DIGIT) == 0)
898 ret = (table[*str2] & DIGIT) ? LOWER : SAME;
899 else
900 ret = HIGHER;
903 /* Reverse sense of comparisons if negative is true. (-1000 < -1) */
904 return(negative) ? -ret : ret;
907 /* Files_merge () merges all files as indicated by nr_of_files. Merging goes
908 * in numbers of files that can be opened at the same time. (OPEN_FILES)
910 void files_merge(file_cnt)
911 register int file_cnt; /* Nr_of_files to merge */
913 register int i;
914 int limit;
916 for (i = 0; i < file_cnt; i += OPEN_FILES) {
917 /* Merge last files and store in output file */
918 if ((limit = i + OPEN_FILES) >= file_cnt) {
919 open_outfile();
920 limit = file_cnt;
921 } else { /* Merge OPEN_FILES files and store in temp
922 * file */
923 temp_files[16] = file_cnt / 26 + 'a';
924 temp_files[17] = file_cnt % 26 + 'a';
925 if ((out_fd = creat(temp_files, 0644)) < 0)
926 error(TRUE, "Cannot creat ", temp_files);
927 file_cnt++;
929 merge(i, limit);
932 /* Cleanup mess */
933 i = (only_merge) ? args_limit - args_offset : 0;
934 while (i < file_cnt) (void) unlink(file_name(i++));
937 /* Merge () merges the files between start_file and limit_file. */
938 void merge(start_file, limit_file)
939 int start_file, limit_file;
941 register MERGE *smallest; /* Keeps track of smallest line */
942 register int i;
943 int file_cnt = limit_file - start_file; /* Nr of files to merge */
945 /* Calculate size in core available for file_cnt merge structs */
946 buf_size = MEMORY_SIZE / file_cnt - LINE_SIZE;
948 mbrk(mem_top); /* First reset mem to lowest loc. */
949 disabled = 0; /* All files not done yet */
951 /* Set up merge structures. */
952 for (i = start_file; i < limit_file; i++) {
953 smallest = &merge_f[i - start_file];
954 if (!strcmp(file_name(i), "-")) /* File is stdin */
955 smallest->fd = 0;
956 else if ((smallest->fd = open(file_name(i), O_RDONLY)) < 0) {
957 smallest->fd = ERROR;
958 error(FALSE, "Cannot open ", file_name(i));
959 disabled++; /* Done this file */
960 continue;
962 smallest->buffer = msbrk(buf_size);
963 smallest->line = msbrk(LINE_SIZE);
964 smallest->cnt = smallest->read_chars = 0;
965 (void) read_line(smallest); /* Read first line */
968 if (disabled == file_cnt) { /* Couldn't open files */
969 (void) close(out_fd);
970 return;
973 /* Find a merg struct to assign smallest. */
974 for (i = 0; i < file_cnt; i++) {
975 if (merge_f[i].fd != ERROR) {
976 smallest = &merge_f[i];
977 break;
981 /* Loop until all files minus one are done */
982 while (disabled < file_cnt - 1) {
983 if (uniq) /* Skip all same lines */
984 smallest = skip_lines(smallest, file_cnt);
985 else { /* Find smallest line */
986 for (i = 0; i < file_cnt; i++) {
987 if (merge_f[i].fd == ERROR)
988 continue; /* We've had this one */
989 if (compare(merge_f[i].line, smallest->line) < 0)
990 smallest = &merge_f[i];
992 } /* Print line and read next */
993 smallest = print(smallest, file_cnt);
996 if (only_merge && uniq)
997 uniq_lines(smallest); /* Print only uniq lines */
998 else /* Print rest of file */
999 while (print(smallest, file_cnt) != NULL);
1001 put_line(NULL); /* Flush output buffer */
1004 /* Put_line () prints the line into the out_fd filedescriptor. If line equals
1005 * NULL, the out_fd is flushed and closed.
1007 void put_line(line)
1008 register char *line;
1010 static int index = 0; /* Index in out_buffer */
1012 if (line == NULL) { /* Flush and close */
1013 mwrite(out_fd, out_buffer, index);
1014 index = 0;
1015 (void) close(out_fd);
1016 return;
1018 do { /* Fill out_buffer with line */
1019 out_buffer[index++] = *line;
1020 if (index == IO_SIZE) {
1021 mwrite(out_fd, out_buffer, IO_SIZE);
1022 index = 0;
1024 } while (*line++ != '\n');
1028 * Print () prints the line of the merg structure and tries to read another one.
1029 * If this fails, it returns the next merg structure which file_descriptor is
1030 * still open. If none could be found, a NIL structure is returned.
1032 MERGE *print(merg, file_cnt)
1033 register MERGE *merg;
1034 int file_cnt; /* Nr of files that are being merged */
1036 register int i;
1038 put_line(merg->line); /* Print the line */
1040 if (read_line(merg) == ERROR) { /* Read next line */
1041 for (i = 0; i < file_cnt; i++) {
1042 if (merge_f[i].fd != ERROR) {
1043 merg = &merge_f[i];
1044 break;
1047 if (i == file_cnt) /* No more files left */
1048 return NULL;
1050 return merg;
1053 /* Read_line () reads a line from the fd from the merg struct. If the read
1054 * failed, disabled is incremented and the file is closed. Readings are
1055 * done in buf_size bytes.
1056 * Lines longer than LINE_SIZE are silently truncated.
1058 int read_line(merg)
1059 register MERGE *merg;
1061 register char *ptr = merg->line - 1; /* Ptr buf that will hold line */
1063 do {
1064 ptr++;
1065 if (merg->cnt == merg->read_chars) { /* Read new buffer */
1066 if ((merg->read_chars =
1067 read(merg->fd, merg->buffer, buf_size)) <= 0) {
1068 (void) close(merg->fd); /* OOPS */
1069 merg->fd = ERROR;
1070 disabled++;
1071 return ERROR;
1073 merg->cnt = 0;
1075 *ptr = merg->buffer[merg->cnt++]; /* Assign next char of line */
1076 if (ptr - merg->line == LINE_SIZE - 1)
1077 *ptr = '\n'; /* Truncate very long lines */
1078 } while (*ptr != '\n' && *ptr != '\0');
1080 if (*ptr == '\0') /* Add '\n' to last line */
1081 *ptr = '\n';
1082 *++ptr = '\0'; /* Add '\0' */
1083 return OK;
1086 /* Skip_lines () skips all same lines in all the files currently being merged.
1087 * It returns a pointer to the merge struct containing the smallest line.
1089 MERGE *skip_lines(smallest, file_cnt)
1090 register MERGE *smallest;
1091 int file_cnt;
1093 register int i;
1094 int ret;
1096 if (disabled == file_cnt - 1) /* We've had all */
1097 return smallest;
1099 for (i = 0; i < file_cnt; i++) {
1100 if (merge_f[i].fd == ERROR || smallest == &merge_f[i])
1101 continue; /* Don't check same file */
1102 while ((ret = compare(merge_f[i].line, smallest->line)) == 0) {
1103 if (read_line(&merge_f[i]) == ERROR) break; /* EOF */
1105 if (ret < 0) /* Line wasn't smallest. Try again */
1106 return skip_lines(&merge_f[i], file_cnt);
1108 return smallest;
1111 /* Uniq_lines () prints only the uniq lines out of the fd of the merg struct. */
1112 void uniq_lines(merg)
1113 register MERGE *merg;
1115 char lastline[LINE_SIZE]; /* Buffer to hold last line */
1117 for (;;) {
1118 put_line(merg->line); /* Print this line */
1119 copy(lastline, merg->line); /* and save it */
1120 if (read_line(merg) == ERROR) /* Read the next */
1121 return;
1122 /* Keep reading until lines duffer */
1123 while (compare(lastline, merg->line) == SAME)
1124 if (read_line(merg) == ERROR) return;
1127 /* NOTREACHED */
1131 * Check_file () checks if a file is sorted in order according to the arguments
1132 * given in main ().
1134 void check_file(fd, file)
1135 int fd;
1136 char *file;
1138 register MERGE *merg; /* 1 file only */
1139 char lastline[LINE_SIZE]; /* Save last line */
1140 register int ret; /* ret status of compare */
1142 if (fd == 0) file = "stdin";
1143 merg = (MERGE *) mem_top; /* Assign MERGE structure */
1144 merg->buffer = mem_top + sizeof(MERGE);
1145 merg->line = msbrk(LINE_SIZE);
1146 merg->cnt = merg->read_chars = 0;
1147 merg->fd = fd;
1148 buf_size = MEMORY_SIZE - sizeof(MERGE);
1150 if (read_line(merg) == ERROR) /* Read first line */
1151 return;
1152 copy(lastline, merg->line); /* and save it */
1154 for (;;) {
1155 if (read_line(merg) == ERROR) /* EOF reached */
1156 break;
1157 if ((ret = compare(lastline, merg->line)) > 0) {
1158 error(FALSE, "Disorder in file ", file);
1159 write(2, merg->line, length(merg->line));
1160 break;
1161 } else if (ret < 0) /* Copy if lines not equal */
1162 copy(lastline, merg->line);
1163 else if (uniq) {
1164 error(FALSE, "Non uniq line in file ", file);
1165 write(2, merg->line, length(merg->line));
1166 break;
1170 mbrk(mem_top); /* Reset mem */
1173 /* Length () returns the length of the argument line including the linefeed. */
1174 int length(line)
1175 register char *line;
1177 register int i = 1; /* Add linefeed */
1179 while (*line++ != '\n') i++;
1180 return i;
1183 /* Copy () copies the src line into the dest line including linefeed. */
1184 void copy(dest, src)
1185 register char *dest, *src;
1187 while ((*dest++ = *src++) != '\n');
1190 /* Msbrk() does a sbrk() and checks the return value. */
1191 char *msbrk(size)
1192 register int size;
1194 register char *address;
1196 if ((address = sbrk(size)) == (char *) -1)
1197 error(TRUE, "Not enough memory. Use chmem to allocate more", NULL);
1198 return address;
1201 /* Mbrk() does a brk() and checks the return value. */
1202 void mbrk(address)
1203 char *address;
1205 if (brk(address) == -1) error(TRUE, "Cannot reset memory", NULL);
1208 void catch(dummy)
1209 int dummy; /* to satisfy the prototype */
1211 register int i;
1213 signal(SIGINT, SIG_IGN);
1214 only_merge = FALSE;
1215 for (i = 0; i < 26; i++) (void) unlink(file_name(i));
1216 exit(2);