1 /* sort - sort a file of lines Author: Michiel Huisjes */
4 * sort [-funbirdcmt'x'] [+beg_pos[opts] [-end_pos]] [-o outfile] [file]..
7 * -f : Fold upper case to lower.
8 * -n : Sort to numeric value (optional decimal point) implies -b
9 * -b : Skip leading blanks
10 * -i : Ignore chars outside ASCII range (040 - 0176)
11 * -r : Reverse the sense of comparisons.
12 * -d : Sort to dictionary order. Only letters, digits, comma's and points
14 * If any of these flags are used in [opts], then they override all global
15 * ordering for this field.
17 * I/O control flags are:
18 * -u : Print uniq lines only once.
19 * -c : Check if files are sorted in order.
20 * -m : Merge already sorted files.
21 * -o outfile : Name of output file. (Can be one of the input files).
23 * - : Take stdin as input.
26 * -t'x' : Field separating character is 'x'
27 * +a.b : Start comparing at field 'a' with offset 'b'. A missing 'b' is
29 * -a.b : Stop comparing at field 'a' with offset 'b'. A missing 'b' is
31 * A missing -a.b means the rest of the line.
34 #include <sys/types.h>
44 #define OPEN_FILES (OPEN_MAX-4) /* Nr of open files per process */
46 #define MEMORY_SIZE (1024 * 1024)
48 #define MEMORY_SIZE ((10 * sizeof(int)) * 1024)
51 #define LINE_SIZE (1024 >> 1) /* Max length of a line */
52 #define IO_SIZE (2 * 1024) /* Size of buffered output */
53 #define STD_OUT 1 /* Fd of terminal */
55 /* Return status of functions */
59 /* Compare return values */
64 /* Table definitions. */
65 #define DICT 0x001 /* Alpha, numeric, letters and . */
66 #define ASCII 0x002 /* All between ' ' and '~' */
67 #define BLANK 0x004 /* ' ' and '\t' */
68 #define DIGIT 0x008 /* 0-9 */
69 #define UPPER 0x010 /* A-Z */
77 int fd
; /* Fd of file */
78 char *buffer
; /* Buffer for reads */
79 int read_chars
; /* Nr of chars actually read in buffer */
80 int cnt
; /* Nr of chars taken out of buffer */
81 char *line
; /* Contains line currently used */
84 MERGE merge_f
[OPEN_FILES
]; /* Merge structs */
85 int buf_size
; /* Size of core available for each struct */
87 #define FIELDS_LIMIT 10 /* 1 global + 9 user */
91 int beg_field
, beg_pos
; /* Begin field + offset */
92 int end_field
, end_pos
; /* End field + offset. ERROR == EOLN */
93 BOOL reverse
; /* TRUE if rev. flag set on this field */
102 /* Field declarations. A total of FILEDS_LIMIT is allowed */
103 FIELD fields
[FIELDS_LIMIT
];
104 int field_cnt
; /* Nr of field actually assigned */
106 /* Various output control flags */
108 BOOL only_merge
= FALSE
;
111 char *mem_top
; /* Mem_top points to lowest pos of memory. */
112 char *cur_pos
; /* First free position in mem */
113 char **line_table
; /* Pointer to the internal line table */
114 BOOL in_core
= TRUE
; /* Set if input cannot all be sorted in core */
116 /* Place where temp_files should be made */
117 char temp_files
[] = "/tmp/sort.XXXXX.XX";
118 char *output_file
; /* Name of output file */
119 int out_fd
; /* Fd to output file (could be STD_OUT) */
120 char out_buffer
[IO_SIZE
]; /* For buffered output */
122 char **argptr
; /* Pointer to argv structure */
123 int args_offset
; /* Nr of args spilled on options */
124 int args_limit
; /* Nr of args given */
126 char separator
; /* Char that separates fields */
127 int nr_of_files
= 0; /* Nr_of_files to be merged */
128 int disabled
; /* Nr of files done */
130 char USAGE
[] = "Usage: sort [-funbirdcmt'x'] [+beg_pos [-end_pos]] [-o outfile] [file] ..";
132 /* Forward declarations */
133 int main(int argc
, char **argv
);
134 void get_opts(char *ptr
, FIELD
* field
);
135 void new_field(FIELD
* field
, int *offset
, BOOL beg_fl
);
136 void adjust_options(FIELD
* field
);
137 void error(BOOL quit
, char *message
, char *arg
);
138 void open_outfile(void);
139 void get_file(int fd
, off_t size
);
141 void print_table(int fd
);
142 char *file_name(int nr
);
143 void mread(int fd
, char *address
, int bytes
);
144 void mwrite(int fd
, char *address
, int bytes
);
146 void sort_table(int nel
);
147 void incr(int si
, int ei
);
148 int cmp_fields(char *el1
, char *el2
);
149 void build_field(char *dest
, FIELD
* field
, char *src
);
150 char *skip_fields(char *str
, int nf
);
151 int compare(char *el1
, char *el2
);
152 int cmp(unsigned char *el1
, unsigned char *el2
, FIELD
* field
);
153 int digits(char *str1
, char *str2
, BOOL check_sign
);
154 int hexits(char *str1
, char *str2
);
155 void files_merge(int file_cnt
);
156 void merge(int start_file
, int limit_file
);
157 void put_line(char *line
);
158 MERGE
* print(MERGE
* merg
, int file_cnt
);
159 int read_line(MERGE
* merg
);
160 MERGE
* skip_lines(MERGE
* smallest
, int file_cnt
);
161 void uniq_lines(MERGE
* merg
);
162 void check_file(int fd
, char *file
);
163 int length(char *line
);
164 void copy(char *dest
, char *src
);
165 char *msbrk(int size
);
166 void mbrk(char *address
);
167 void catch(int dummy
);
169 /* Table of all chars. 0 means no special meaning. */
172 0, 0, 0, 0, 0, 0, 0, 0,
173 0, BLANK
| DICT
, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0,
178 BLANK
| DICT
| ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,
179 ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,
183 DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
,
184 DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
,
185 DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
, DIGIT
| DICT
| ASCII
,
186 DIGIT
| DICT
| ASCII
,
188 /* ASCII from ':' to '@' */
189 ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,
191 /* Upper case letters 'A' to 'Z' */
192 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
193 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
194 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
195 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
196 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
197 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
198 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
199 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
200 UPPER
| DICT
| ASCII
, UPPER
| DICT
| ASCII
,
202 /* ASCII from '[' to '`' */
203 ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,
205 /* Lower case letters from 'a' to 'z' */
206 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
207 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
208 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
209 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
210 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
211 DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
, DICT
| ASCII
,
212 DICT
| ASCII
, DICT
| ASCII
,
214 /* ASCII from '{' to '~' */
215 ASCII
, ASCII
, ASCII
, ASCII
,
217 /* Stuff from -1 to -177 */
218 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 0, 0, 0, 0, 0, 0, 0, 0, 0,
231 * Get_opts () assigns the options into the field structure as described in ptr.
232 * This field structure could be the GLOBAL one.
234 void get_opts(ptr
, field
)
236 register FIELD
*field
;
239 case 'b': /* Skip leading blanks */
240 field
->blanks
= TRUE
;
242 case 'd': /* Dictionary order */
243 field
->dictionary
= TRUE
;
245 case 'f': /* Fold upper case to lower */
246 field
->fold_case
= TRUE
;
248 case 'i': /* Skip chars outside ' ' '~' */
251 case 'n': /* Sort on numeric */
252 field
->numeric
= TRUE
;
253 field
->blanks
= TRUE
;
256 field
->hexmode
= TRUE
;
257 field
->blanks
= TRUE
;
259 case 'r': /* Reverse comparisons */
260 field
->reverse
= TRUE
;
262 default: /* Illegal options */
263 error(TRUE
, USAGE
, NULL
);
267 /* New_field () assigns a new field as described by the arguments.
268 * A field description is of the form: +a.b[opts] -c.d, where b and d, as well
269 * as -c.d and [opts] are optional. Nr before digit is field nr. Nr after digit
270 * is offset from field.
272 void new_field(field
, offset
, beg_fl
)
273 register FIELD
*field
; /* Field to assign */
274 int *offset
; /* Offset in argv structure */
275 BOOL beg_fl
; /* Assign beg or end of field */
279 ptr
= argptr
[*offset
];
280 *offset
+= 1; /* Incr offset to next arg */
284 field
->beg_field
= atoi(ptr
); /* Assign int of first field */
286 field
->end_field
= atoi(ptr
);
288 while (table
[*ptr
] & DIGIT
) /* Skip all digits */
291 if (*ptr
== '.') { /* Check for offset */
294 field
->beg_pos
= atoi(ptr
);
296 field
->end_pos
= atoi(ptr
);
297 while (table
[*ptr
] & DIGIT
) /* Skip digits */
301 while (*ptr
!= '\0') /* Check options after field */
302 get_opts(ptr
++, field
);
304 if (beg_fl
) { /* Check for end pos */
305 ptr
= argptr
[*offset
];
306 if (ptr
&& *ptr
== '-' && ((table
[*(ptr
+ 1)] & DIGIT
) || *(ptr
+ 1) == '.')) {
307 new_field(field
, offset
, FALSE
);
308 if (field
->beg_field
> field
->end_field
)
309 error(TRUE
, "End field is before start field!", NULL
);
310 } else /* No end pos. */
311 field
->end_field
= ERROR
;
319 int arg_count
= 1; /* Offset in argv */
321 register char *ptr
; /* Ptr to *argv in use */
326 cur_pos
= mem_top
= msbrk(MEMORY_SIZE
); /* Find lowest mem. location */
328 while (arg_count
< argc
&& ((ptr
= argv
[arg_count
])[0] == '-' || *ptr
== '+')) {
329 if (*ptr
== '-' && *(ptr
+ 1) == '\0') /* "-" means stdin */
331 if (*ptr
== '+') { /* Assign field. */
332 if (++field_cnt
== FIELDS_LIMIT
)
333 error(TRUE
, "Too many fields", NULL
);
334 new_field(&fields
[field_cnt
], &arg_count
, TRUE
);
335 } else { /* Get output options */
338 case 'c': /* Only check file */
341 case 'm': /* Merge (sorted) files */
344 case 'u': /* Only give uniq lines */
347 case 'o': /* Name of output file */
348 output_file
= argv
[++arg_count
];
350 case 't': /* Field separator */
354 default: /* Sort options */
355 get_opts(ptr
, &fields
[GLOBAL
]);
362 for (fd
= 1; fd
<= field_cnt
; fd
++) adjust_options(&fields
[fd
]);
364 /* Create name of tem_files 'sort.pid.aa' */
365 ptr
= &temp_files
[10];
369 *ptr
++ = pid
/ pow
+ '0';
374 signal(SIGINT
, catch);
376 /* Only merge files. Set up */
378 args_limit
= args_offset
= arg_count
;
379 while (argv
[args_limit
] != NULL
)
380 args_limit
++; /* Find nr of args */
381 files_merge(args_limit
- arg_count
);
384 if (arg_count
== argc
) { /* No args left. Use stdin */
388 get_file(0, (off_t
) 0);
390 while (arg_count
< argc
) { /* Sort or check args */
391 if (strcmp(argv
[arg_count
], "-") == 0)
393 else if (stat(argv
[arg_count
], &st
) < 0) {
394 error(FALSE
, "Cannot find ", argv
[arg_count
++]);
399 else if ((fd
= open(argv
[arg_count
], O_RDONLY
)) < 0) {
400 error(FALSE
, "Cannot open ", argv
[arg_count
++]);
404 check_file(fd
, argv
[arg_count
]);
405 else /* Get_file reads whole file */
406 get_file(fd
, st
.st_size
);
412 sort(); /* Sort whatever is left */
414 if (nr_of_files
== 1) /* Only one file sorted -> don't merge */
417 files_merge(nr_of_files
);
421 /* Adjust_options() assigns all global variables set also in the fields
424 void adjust_options(field
)
425 register FIELD
*field
;
427 register FIELD
*gfield
= &fields
[GLOBAL
];
429 if (gfield
->reverse
) field
->reverse
= TRUE
;
430 if (gfield
->blanks
) field
->blanks
= TRUE
;
431 if (gfield
->dictionary
) field
->dictionary
= TRUE
;
432 if (gfield
->fold_case
) field
->fold_case
= TRUE
;
433 if (gfield
->ascii
) field
->ascii
= TRUE
;
434 if (gfield
->numeric
) field
->numeric
= TRUE
;
437 /* Error () prints the error message on stderr and exits if quit == TRUE. */
438 void error(quit
, message
, arg
)
440 register char *message
, *arg
;
442 write(2, message
, strlen(message
));
443 if (arg
!= NULL
) write(2, arg
, strlen(arg
));
448 /* Open_outfile () assigns to out_fd the fd where the output must go when all
449 * the sorting is done.
453 if (output_file
== NULL
)
455 else if ((out_fd
= creat(output_file
, 0644)) < 0)
456 error(TRUE
, "Cannot creat ", output_file
);
459 /* Get_file reads the whole file of filedescriptor fd. If the file is too big
460 * to keep in core, a partial sort is done, and the output is stashed somewhere.
462 void get_file(fd
, size
)
463 int fd
; /* Fd of file to read */
464 register off_t size
; /* Size of file */
467 int rest
; /* Rest in memory */
468 char save_ch
; /* Used in stdin readings */
470 rest
= MEMORY_SIZE
- (cur_pos
- mem_top
);
471 if (fd
== 0) { /* We're reding stdin */
472 while ((i
= read(0, cur_pos
, rest
)) > 0) {
473 if ((cur_pos
- mem_top
) + i
== MEMORY_SIZE
) {
475 i
= last_line(); /* End of last line */
476 save_ch
= mem_top
[i
];
478 sort(); /* Sort core */
479 mem_top
[i
] = save_ch
; /* Restore erased char */
480 /* Restore last (half read) line */
481 for (rest
= 0; i
+ rest
!= MEMORY_SIZE
; rest
++)
482 mem_top
[rest
] = mem_top
[i
+ rest
];
483 /* Assign current pos. in memory */
484 cur_pos
= &mem_top
[rest
];
485 } else { /* Fits, just assign position in mem. */
486 cur_pos
= cur_pos
+ i
;
490 /* Calculate rest of mem */
491 rest
= MEMORY_SIZE
- (cur_pos
- mem_top
);
495 /* Reading file. Check size */
496 else if (size
> rest
) { /* Won't fit */
497 mread(fd
, cur_pos
, rest
);
499 i
= last_line(); /* Get pos. of last line */
500 mem_top
[i
] = '\0'; /* Truncate */
501 (void) lseek(fd
, (off_t
) (i
- MEMORY_SIZE
), SEEK_CUR
); /* Do this next time */
502 size
= size
- rest
- i
+ MEMORY_SIZE
; /* Calculate rest */
503 cur_pos
= mem_top
; /* Reset mem */
504 sort(); /* Sort core */
505 get_file(fd
, size
); /* Get rest of file */
506 } else { /* Fits. Just read in */
508 mread(fd
, cur_pos
, rest
);
509 cur_pos
= cur_pos
+ rest
; /* Reassign cur_pos */
511 (void) close(fd
); /* File completed */
515 /* Last_line () find the last line in core and retuns the offset from the top
522 for (i
= MEMORY_SIZE
- 2; i
> 0; i
--)
523 if (mem_top
[i
] == '\n') break;
527 /* Print_table prints the line table in the given file_descriptor. If the fd
528 * equals ERROR, it opens a temp_file itself.
533 register char **line_ptr
; /* Ptr in line_table */
534 register char *ptr
; /* Ptr to line */
535 int index
= 0; /* Index in output buffer */
538 if ((fd
= creat(file_name(nr_of_files
), 0644)) < 0)
539 error(TRUE
, "Cannot creat ", file_name(nr_of_files
));
541 for (line_ptr
= line_table
; *line_ptr
!= NULL
; line_ptr
++) {
543 /* Skip all same lines if uniq is set */
544 if (uniq
&& *(line_ptr
+ 1) != NULL
) {
545 if (compare(ptr
, *(line_ptr
+ 1)) == SAME
) continue;
547 do { /* Print line in a buffered way */
548 out_buffer
[index
++] = *ptr
;
549 if (index
== IO_SIZE
) {
550 mwrite(fd
, out_buffer
, IO_SIZE
);
553 } while (*ptr
++ != '\n');
555 mwrite(fd
, out_buffer
, index
);/* Flush buffer */
556 (void) close(fd
); /* Close file */
557 nr_of_files
++; /* Increment nr_of_files to merge */
560 /* File_name () returns the nr argument from the argument list, or a uniq
561 * filename if the nr is too high, or the arguments were not merge files.
567 if (args_offset
+ nr
< args_limit
) return argptr
[args_offset
+ nr
];
569 temp_files
[16] = nr
/ 26 + 'a';
570 temp_files
[17] = nr
% 26 + 'a';
575 /* Mread () performs a normal read (), but checks the return value. */
576 void mread(fd
, address
, bytes
)
581 if (read(fd
, address
, bytes
) < 0 && bytes
!= 0)
582 error(TRUE
, "Read error", NULL
);
585 /* Mwrite () performs a normal write (), but checks the return value. */
586 void mwrite(fd
, address
, bytes
)
591 if (write(fd
, address
, bytes
) != bytes
&& bytes
!= 0)
592 error(TRUE
, "Write error", NULL
);
595 /* Sort () sorts the input in memory starting at mem_top. */
598 register char *ptr
= mem_top
;
599 register int count
= 0;
601 /* Count number of lines in memory */
603 if (*ptr
++ == '\n') count
++;
606 /* Set up the line table */
607 line_table
= (char **) msbrk(count
* sizeof(char *) + sizeof(char *));
610 ptr
= line_table
[0] = mem_top
;
612 if (*ptr
++ == '\n') line_table
[count
++] = ptr
;
615 line_table
[count
- 1] = NULL
;
617 /* Sort the line table */
618 sort_table(count
- 1);
620 /* Stash output somewhere */
627 /* Free line table */
628 mbrk((char *) line_table
);
631 /* Sort_table () sorts the line table consisting of nel elements. */
639 for (i
= (nel
>> 1); i
>= 1; i
--) incr(i
, nel
);
642 for (i
= nel
; i
> 1; i
--) {
644 line_table
[0] = line_table
[i
- 1];
645 line_table
[i
- 1] = tmp
;
650 /* Incr () increments the heap. */
656 while (si
<= (ei
>> 1)) {
658 if (si
+ 1 <= ei
&& compare(line_table
[si
- 1], line_table
[si
]) <= 0)
660 if (compare(line_table
[(si
>> 1) - 1], line_table
[si
- 1]) >= 0)
662 tmp
= line_table
[(si
>> 1) - 1];
663 line_table
[(si
>> 1) - 1] = line_table
[si
- 1];
664 line_table
[si
- 1] = tmp
;
668 /* Cmp_fields builds new lines out of the lines pointed to by el1 and el2 and
669 * puts it into the line1 and line2 arrays. It then calls the cmp () routine
670 * with the field describing the arguments.
672 int cmp_fields(el1
, el2
)
673 register char *el1
, *el2
;
676 char line1
[LINE_SIZE
], line2
[LINE_SIZE
];
678 for (i
= 0; i
< field_cnt
; i
++) { /* Setup line parts */
679 build_field(line1
, &fields
[i
+ 1], el1
);
680 build_field(line2
, &fields
[i
+ 1], el2
);
681 if ((ret
= cmp((unsigned char *) line1
, (unsigned char *) line2
,
682 &fields
[i
+ 1])) != SAME
)
683 break; /* If equal, try next field */
686 /* Check for reverse flag */
687 if (i
!= field_cnt
&& fields
[i
+ 1].reverse
) return -ret
;
689 /* Else return the last return value of cmp () */
693 /* Build_field builds a new line from the src as described by the field.
694 * The result is put in dest.
696 void build_field(dest
, field
, src
)
697 char *dest
; /* Holds result */
698 register FIELD
*field
; /* Field description */
699 register char *src
; /* Source line */
701 char *begin
= src
; /* Remember start location */
702 char *last
; /* Pointer to end location */
705 /* Skip begin fields */
706 src
= skip_fields(src
, field
->beg_field
);
708 /* Skip begin positions */
709 for (i
= 0; i
< field
->beg_pos
&& *src
!= '\n'; i
++) src
++;
711 /* Copy whatever is left */
714 /* If end field is assigned truncate (perhaps) the part copied */
715 if (field
->end_field
!= ERROR
) { /* Find last field */
716 last
= skip_fields(begin
, field
->end_field
);
717 /* Skip positions as given by end fields description */
718 for (i
= 0; i
< field
->end_pos
&& *last
!= '\n'; i
++) last
++;
719 dest
[last
- src
] = '\n';/* Truncate line */
723 /* Skip_fields () skips nf fields of the line pointed to by str. */
724 char *skip_fields(str
, nf
)
729 if (separator
== '\0') {/* Means ' ' or '\t' */
730 while (*str
!= ' ' && *str
!= '\t' && *str
!= '\n') str
++;
731 while (table
[*str
] & BLANK
) str
++;
733 while (*str
!= separator
&& *str
!= '\n') str
++;
734 if (*str
== separator
) str
++;
737 return str
; /* Return pointer to indicated field */
740 /* Compare is called by all sorting routines. It checks if fields assignments
741 * has been made. if so, it calls cmp_fields (). If not, it calls cmp () and
742 * reversed the return value if the (global) reverse flag is set.
744 int compare(el1
, el2
)
745 register char *el1
, *el2
;
749 if (field_cnt
> GLOBAL
) return cmp_fields(el1
, el2
);
751 ret
= cmp((unsigned char *) el1
, (unsigned char *) el2
, &fields
[GLOBAL
]);
752 return(fields
[GLOBAL
].reverse
) ? -ret
: ret
;
755 /* Cmp () is the actual compare routine. It compares according to the
756 * description given in the field pointer.
758 int cmp(el1
, el2
, field
)
759 register unsigned char *el1
, *el2
;
764 if (field
->blanks
) { /* Skip leading blanks */
765 while (table
[*el1
] & BLANK
) el1
++;
766 while (table
[*el2
] & BLANK
) el2
++;
768 if (field
->numeric
) /* Compare numeric */
769 return digits((char *) el1
, (char *) el2
, TRUE
);
770 if (field
->hexmode
) /* Compare hex */
771 return hexits((char *) el1
, (char *) el2
);
774 while (*el1
== *el2
) {
775 if (*el1
++ == '\n') /* EOLN on both strings */
779 if (*el1
== '\n') /* EOLN on string one */
781 if (*el2
== '\n') return HIGHER
;
782 if (field
->ascii
) { /* Skip chars outside 040 - 0177 */
783 if ((table
[*el1
] & ASCII
) == 0) {
786 } while ((table
[*el1
] & ASCII
) == 0);
789 if ((table
[*el2
] & ASCII
) == 0) {
792 } while ((table
[*el2
] & ASCII
) == 0);
796 if (field
->dictionary
) {/* Skip non-dict chars */
797 if ((table
[*el1
] & DICT
) == 0) {
800 } while ((table
[*el1
] & DICT
) == 0);
803 if ((table
[*el2
] & DICT
) == 0) {
806 } while ((table
[*el2
] & DICT
) == 0);
810 if (field
->fold_case
) { /* Fold upper case to lower */
811 if (table
[c1
= *el1
++] & UPPER
) c1
+= 'a' - 'A';
812 if (table
[c2
= *el2
++] & UPPER
) c2
+= 'a' - 'A';
813 if (c1
== c2
) continue;
822 int hexits(char *str1
, char *str2
)
824 unsigned long v1
, v2
;
826 r1
= sscanf(str1
, "0x%lx", &v1
);
827 r2
= sscanf(str2
, "0x%lx", &v2
);
829 /* ordering based on reasonable hex number */
830 if(r1
== 1 && r2
!= 1) return HIGHER
;
831 if(r1
!= 1 && r2
== 1) return LOWER
;
832 if(r1
!= 1 && r2
!= 1) return SAME
;
834 if(v1
> v2
) return HIGHER
;
835 if(v1
< v2
) return LOWER
;
841 * Digits compares () the two strings that point to a number of digits followed
842 * by an optional decimal point.
844 int digits(str1
, str2
, check_sign
)
845 register char *str1
, *str2
;
846 BOOL check_sign
; /* True if sign must be checked */
848 BOOL negative
= FALSE
; /* True if negative numbers */
851 /* Check for optional minus or plus sign */
856 } else if (*str1
== '+')
860 if (negative
== FALSE
) return HIGHER
;
864 else if (*str2
== '+')
868 /* Keep incrementing as long as digits are available and equal */
869 while ((table
[*str1
] & DIGIT
) && table
[*str2
] & DIGIT
) {
870 if (*str1
!= *str2
) break;
875 /* First check for the decimal point. */
876 if (*str1
== '.' || *str2
== '.') {
878 if (*str2
== '.') /* Both. Check decimal part */
879 ret
= digits(str1
+ 1, str2
+ 1, FALSE
);
881 ret
= (table
[*str2
] & DIGIT
) ? LOWER
: HIGHER
;
883 ret
= (table
[*str1
] & DIGIT
) ? HIGHER
: LOWER
;
886 /* Now either two digits differ, or unknown char is seen (e.g. end of string) */
887 else if ((table
[*str1
] & DIGIT
) && (table
[*str2
] & DIGIT
)) {
888 diff
= *str1
- *str2
; /* Basic difference */
889 pow
= 0; /* Check power of numbers */
890 while (table
[*str1
++] & DIGIT
) pow
++;
891 while (table
[*str2
++] & DIGIT
) pow
--;
892 ret
= (pow
== 0) ? diff
: pow
;
895 /* Unknown char. Check on which string it occurred */
897 if ((table
[*str1
] & DIGIT
) == 0)
898 ret
= (table
[*str2
] & DIGIT
) ? LOWER
: SAME
;
903 /* Reverse sense of comparisons if negative is true. (-1000 < -1) */
904 return(negative
) ? -ret
: ret
;
907 /* Files_merge () merges all files as indicated by nr_of_files. Merging goes
908 * in numbers of files that can be opened at the same time. (OPEN_FILES)
910 void files_merge(file_cnt
)
911 register int file_cnt
; /* Nr_of_files to merge */
916 for (i
= 0; i
< file_cnt
; i
+= OPEN_FILES
) {
917 /* Merge last files and store in output file */
918 if ((limit
= i
+ OPEN_FILES
) >= file_cnt
) {
921 } else { /* Merge OPEN_FILES files and store in temp
923 temp_files
[16] = file_cnt
/ 26 + 'a';
924 temp_files
[17] = file_cnt
% 26 + 'a';
925 if ((out_fd
= creat(temp_files
, 0644)) < 0)
926 error(TRUE
, "Cannot creat ", temp_files
);
933 i
= (only_merge
) ? args_limit
- args_offset
: 0;
934 while (i
< file_cnt
) (void) unlink(file_name(i
++));
937 /* Merge () merges the files between start_file and limit_file. */
938 void merge(start_file
, limit_file
)
939 int start_file
, limit_file
;
941 register MERGE
*smallest
; /* Keeps track of smallest line */
943 int file_cnt
= limit_file
- start_file
; /* Nr of files to merge */
945 /* Calculate size in core available for file_cnt merge structs */
946 buf_size
= MEMORY_SIZE
/ file_cnt
- LINE_SIZE
;
948 mbrk(mem_top
); /* First reset mem to lowest loc. */
949 disabled
= 0; /* All files not done yet */
951 /* Set up merge structures. */
952 for (i
= start_file
; i
< limit_file
; i
++) {
953 smallest
= &merge_f
[i
- start_file
];
954 if (!strcmp(file_name(i
), "-")) /* File is stdin */
956 else if ((smallest
->fd
= open(file_name(i
), O_RDONLY
)) < 0) {
957 smallest
->fd
= ERROR
;
958 error(FALSE
, "Cannot open ", file_name(i
));
959 disabled
++; /* Done this file */
962 smallest
->buffer
= msbrk(buf_size
);
963 smallest
->line
= msbrk(LINE_SIZE
);
964 smallest
->cnt
= smallest
->read_chars
= 0;
965 (void) read_line(smallest
); /* Read first line */
968 if (disabled
== file_cnt
) { /* Couldn't open files */
969 (void) close(out_fd
);
973 /* Find a merg struct to assign smallest. */
974 for (i
= 0; i
< file_cnt
; i
++) {
975 if (merge_f
[i
].fd
!= ERROR
) {
976 smallest
= &merge_f
[i
];
981 /* Loop until all files minus one are done */
982 while (disabled
< file_cnt
- 1) {
983 if (uniq
) /* Skip all same lines */
984 smallest
= skip_lines(smallest
, file_cnt
);
985 else { /* Find smallest line */
986 for (i
= 0; i
< file_cnt
; i
++) {
987 if (merge_f
[i
].fd
== ERROR
)
988 continue; /* We've had this one */
989 if (compare(merge_f
[i
].line
, smallest
->line
) < 0)
990 smallest
= &merge_f
[i
];
992 } /* Print line and read next */
993 smallest
= print(smallest
, file_cnt
);
996 if (only_merge
&& uniq
)
997 uniq_lines(smallest
); /* Print only uniq lines */
998 else /* Print rest of file */
999 while (print(smallest
, file_cnt
) != NULL
);
1001 put_line(NULL
); /* Flush output buffer */
1004 /* Put_line () prints the line into the out_fd filedescriptor. If line equals
1005 * NULL, the out_fd is flushed and closed.
1008 register char *line
;
1010 static int index
= 0; /* Index in out_buffer */
1012 if (line
== NULL
) { /* Flush and close */
1013 mwrite(out_fd
, out_buffer
, index
);
1015 (void) close(out_fd
);
1018 do { /* Fill out_buffer with line */
1019 out_buffer
[index
++] = *line
;
1020 if (index
== IO_SIZE
) {
1021 mwrite(out_fd
, out_buffer
, IO_SIZE
);
1024 } while (*line
++ != '\n');
1028 * Print () prints the line of the merg structure and tries to read another one.
1029 * If this fails, it returns the next merg structure which file_descriptor is
1030 * still open. If none could be found, a NIL structure is returned.
1032 MERGE
*print(merg
, file_cnt
)
1033 register MERGE
*merg
;
1034 int file_cnt
; /* Nr of files that are being merged */
1038 put_line(merg
->line
); /* Print the line */
1040 if (read_line(merg
) == ERROR
) { /* Read next line */
1041 for (i
= 0; i
< file_cnt
; i
++) {
1042 if (merge_f
[i
].fd
!= ERROR
) {
1047 if (i
== file_cnt
) /* No more files left */
1053 /* Read_line () reads a line from the fd from the merg struct. If the read
1054 * failed, disabled is incremented and the file is closed. Readings are
1055 * done in buf_size bytes.
1056 * Lines longer than LINE_SIZE are silently truncated.
1059 register MERGE
*merg
;
1061 register char *ptr
= merg
->line
- 1; /* Ptr buf that will hold line */
1065 if (merg
->cnt
== merg
->read_chars
) { /* Read new buffer */
1066 if ((merg
->read_chars
=
1067 read(merg
->fd
, merg
->buffer
, buf_size
)) <= 0) {
1068 (void) close(merg
->fd
); /* OOPS */
1075 *ptr
= merg
->buffer
[merg
->cnt
++]; /* Assign next char of line */
1076 if (ptr
- merg
->line
== LINE_SIZE
- 1)
1077 *ptr
= '\n'; /* Truncate very long lines */
1078 } while (*ptr
!= '\n' && *ptr
!= '\0');
1080 if (*ptr
== '\0') /* Add '\n' to last line */
1082 *++ptr
= '\0'; /* Add '\0' */
1086 /* Skip_lines () skips all same lines in all the files currently being merged.
1087 * It returns a pointer to the merge struct containing the smallest line.
1089 MERGE
*skip_lines(smallest
, file_cnt
)
1090 register MERGE
*smallest
;
1096 if (disabled
== file_cnt
- 1) /* We've had all */
1099 for (i
= 0; i
< file_cnt
; i
++) {
1100 if (merge_f
[i
].fd
== ERROR
|| smallest
== &merge_f
[i
])
1101 continue; /* Don't check same file */
1102 while ((ret
= compare(merge_f
[i
].line
, smallest
->line
)) == 0) {
1103 if (read_line(&merge_f
[i
]) == ERROR
) break; /* EOF */
1105 if (ret
< 0) /* Line wasn't smallest. Try again */
1106 return skip_lines(&merge_f
[i
], file_cnt
);
1111 /* Uniq_lines () prints only the uniq lines out of the fd of the merg struct. */
1112 void uniq_lines(merg
)
1113 register MERGE
*merg
;
1115 char lastline
[LINE_SIZE
]; /* Buffer to hold last line */
1118 put_line(merg
->line
); /* Print this line */
1119 copy(lastline
, merg
->line
); /* and save it */
1120 if (read_line(merg
) == ERROR
) /* Read the next */
1122 /* Keep reading until lines duffer */
1123 while (compare(lastline
, merg
->line
) == SAME
)
1124 if (read_line(merg
) == ERROR
) return;
1131 * Check_file () checks if a file is sorted in order according to the arguments
1134 void check_file(fd
, file
)
1138 register MERGE
*merg
; /* 1 file only */
1139 char lastline
[LINE_SIZE
]; /* Save last line */
1140 register int ret
; /* ret status of compare */
1142 if (fd
== 0) file
= "stdin";
1143 merg
= (MERGE
*) mem_top
; /* Assign MERGE structure */
1144 merg
->buffer
= mem_top
+ sizeof(MERGE
);
1145 merg
->line
= msbrk(LINE_SIZE
);
1146 merg
->cnt
= merg
->read_chars
= 0;
1148 buf_size
= MEMORY_SIZE
- sizeof(MERGE
);
1150 if (read_line(merg
) == ERROR
) /* Read first line */
1152 copy(lastline
, merg
->line
); /* and save it */
1155 if (read_line(merg
) == ERROR
) /* EOF reached */
1157 if ((ret
= compare(lastline
, merg
->line
)) > 0) {
1158 error(FALSE
, "Disorder in file ", file
);
1159 write(2, merg
->line
, length(merg
->line
));
1161 } else if (ret
< 0) /* Copy if lines not equal */
1162 copy(lastline
, merg
->line
);
1164 error(FALSE
, "Non uniq line in file ", file
);
1165 write(2, merg
->line
, length(merg
->line
));
1170 mbrk(mem_top
); /* Reset mem */
1173 /* Length () returns the length of the argument line including the linefeed. */
1175 register char *line
;
1177 register int i
= 1; /* Add linefeed */
1179 while (*line
++ != '\n') i
++;
1183 /* Copy () copies the src line into the dest line including linefeed. */
1184 void copy(dest
, src
)
1185 register char *dest
, *src
;
1187 while ((*dest
++ = *src
++) != '\n');
1190 /* Msbrk() does a sbrk() and checks the return value. */
1194 register char *address
;
1196 if ((address
= sbrk(size
)) == (char *) -1)
1197 error(TRUE
, "Not enough memory. Use chmem to allocate more", NULL
);
1201 /* Mbrk() does a brk() and checks the return value. */
1205 if (brk(address
) == -1) error(TRUE
, "Cannot reset memory", NULL
);
1209 int dummy
; /* to satisfy the prototype */
1213 signal(SIGINT
, SIG_IGN
);
1215 for (i
= 0; i
< 26; i
++) (void) unlink(file_name(i
));