4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * csplit - Context or line file splitter
31 * Compile: cc -O -s -o csplit csplit.c
52 #define LINSIZ LINE_MAX /* POSIX.2 - read lines LINE_MAX long */
56 char linbuf
[LINSIZ
]; /* Input line buffer */
58 char tmpbuf
[BUFSIZ
]; /* Temporary buffer for stdin */
59 char file
[8192] = "xx"; /* File name buffer */
60 char *targ
; /* Arg ptr for error messages */
62 FILE *infile
, *outfile
; /* I/O file streams */
63 int silent
, keep
, create
; /* Flags: -s(ilent), -k(eep), (create) */
65 int fiwidth
= 2; /* file index width (output file names) */
68 offset_t offset
; /* Regular expression offset value */
69 offset_t curline
; /* Current line in input file */
72 * These defines are needed for regexp handling(see regexp(7))
74 #define PERROR(x) fatal("%s: Illegal Regular Expression\n", targ);
76 static int asc_to_ll(char *, long long *);
77 static void closefile(void);
78 static void fatal(char *, char *);
79 static offset_t
findline(char *, offset_t
);
80 static void flush(void);
81 static FILE *getfile(void);
82 static char *getaline(int);
83 static void line_arg(char *);
84 static void num_arg(char *, int);
85 static void re_arg(char *);
87 static void to_line(offset_t
);
88 static void usage(void);
91 main(int argc
, char **argv
)
96 (void) setlocale(LC_ALL
, "");
97 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
98 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
100 (void) textdomain(TEXT_DOMAIN
);
102 while ((ch
= getopt(argc
, argv
, "skf:n:")) != EOF
) {
105 (void) strcpy(file
, optarg
);
106 if ((ptr
= strrchr(optarg
, '/')) == NULL
)
112 case 'n': /* POSIX.2 */
113 for (ptr
= optarg
; *ptr
!= NULL
; ptr
++)
114 if (!isdigit((int)*ptr
))
115 fatal("-n num\n", NULL
);
116 fiwidth
= atoi(optarg
);
129 argv
= &argv
[optind
];
131 if (argc
<= 1 || errflg
)
134 if (strcmp(*argv
, "-") == 0) {
137 while (fread(tmpbuf
, 1, BUFSIZ
, stdin
) != 0) {
138 if (fwrite(tmpbuf
, 1, BUFSIZ
, infile
) == 0)
139 if (errno
== ENOSPC
) {
140 (void) fprintf(stderr
, "csplit: ");
141 (void) fprintf(stderr
, gettext(
142 "No space left on device\n"));
145 (void) fprintf(stderr
, "csplit: ");
146 (void) fprintf(stderr
, gettext(
147 "Bad write to temporary "
152 /* clear the buffer to get correct size when writing buffer */
154 (void) memset(tmpbuf
, '\0', sizeof (tmpbuf
));
157 } else if ((infile
= fopen(*argv
, "r")) == NULL
)
158 fatal("Cannot open %s\n", *argv
);
160 curline
= (offset_t
)1;
161 (void) signal(SIGINT
, sig
);
164 * The following for loop handles the different argument types.
165 * A switch is performed on the first character of the argument
166 * and each case calls the appropriate argument handling routine.
169 for (; *argv
; ++argv
) {
183 num_arg(*argv
, mode
);
199 * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc)
200 * It returns ERR if an illegal character. The reason that asc_to_ll
201 * does not return an answer(long long) is that any value for the long
202 * long is legal, and this version of asc_to_ll detects error strings.
206 asc_to_ll(char *str
, long long *plc
)
224 for (; *str
!= NULL
; str
++)
225 if (*str
>= '0' && *str
<= '9')
226 *plc
= *plc
* 10 + *str
- '0';
231 return (TRUE
); /* not error */
235 * Closefile prints the byte count of the file created,(via fseeko
236 * and ftello), if the create flag is on and the silent flag is not on.
237 * If the create flag is on closefile then closes the file(fclose).
243 if (!silent
&& create
) {
244 (void) fseeko(outfile
, (offset_t
)0, SEEK_END
);
245 (void) fprintf(stdout
, "%lld\n", (offset_t
)ftello(outfile
));
248 (void) fclose(outfile
);
252 * Fatal handles error messages and cleanup.
253 * Because "arg" can be the global file, and the cleanup processing
254 * uses the global file, the error message is printed first. If the
255 * "keep" flag is not set, fatal unlinks all created files. If the
256 * "keep" flag is set, fatal closes the current file(if there is one).
257 * Fatal exits with a value of 1.
261 fatal(char *string
, char *arg
)
266 (void) fprintf(stderr
, "csplit: ");
268 /* gettext dynamically replaces string */
270 (void) fprintf(stderr
, gettext(string
), arg
);
273 (void) fclose(outfile
);
274 for (fls
= file
; *fls
!= '\0'; fls
++)
277 for (num
= atoi(fls
); num
>= 0; num
--) {
278 (void) sprintf(fls
, "%.*d", fiwidth
, num
);
289 * Findline returns the line number referenced by the current argument.
290 * Its arguments are a pointer to the compiled regular expression(expr),
291 * and an offset(oset). The variable lncnt is used to count the number
292 * of lines searched. First the current stream location is saved via
293 * ftello(), and getaline is called so that R.E. searching starts at the
294 * line after the previously referenced line. The while loop checks
295 * that there are more lines(error if none), bumps the line count, and
296 * checks for the R.E. on each line. If the R.E. matches on one of the
297 * lines the old stream location is restored, and the line number
298 * referenced by the R.E. and the offset is returned.
302 findline(char *expr
, offset_t oset
)
304 static int benhere
= 0;
305 offset_t lncnt
= 0, saveloc
;
307 saveloc
= ftello(infile
);
308 if (curline
!= (offset_t
)1 || benhere
) /* If first line, first time, */
309 (void) getaline(FALSE
); /* then don't skip */
313 while (getaline(FALSE
) != NULL
) {
315 if ((sptr
= strrchr(linbuf
, '\n')) != NULL
)
317 if (step(linbuf
, expr
)) {
318 (void) fseeko(infile
, (offset_t
)saveloc
, SEEK_SET
);
319 return (curline
+lncnt
+oset
);
322 (void) fseeko(infile
, (offset_t
)saveloc
, SEEK_SET
);
323 return (curline
+lncnt
+oset
+2);
327 * Flush uses fputs to put lines on the output file stream(outfile)
328 * Since fputs does its own buffering, flush doesn't need to.
329 * Flush does nothing if the create flag is not set.
336 (void) fputs(linbuf
, outfile
);
340 * Getfile does nothing if the create flag is not set. If the create
341 * flag is set, getfile positions the file pointer(fptr) at the end of
342 * the file name prefix on the first call(fptr=0). The file counter is
343 * stored in the file name and incremented. If the subsequent fopen
344 * fails, the file name is copied to tfile for the error message, the
345 * previous file name is restored for cleanup, and fatal is called. If
346 * the fopen succeeds, the stream(opfil) is returned.
361 for (fptr
= file
; *fptr
!= NULL
; fptr
++)
363 (void) sprintf(fptr
, "%.*d", fiwidth
, ctr
++);
365 /* check for suffix length overflow */
366 if (strlen(fptr
) > fiwidth
) {
367 fatal("Suffix longer than %ld chars; increase -n\n",
371 /* check for filename length overflow */
373 delim
= strrchr(file
, '/');
374 if (delim
== (char *)NULL
) {
375 if (strlen(file
) > pathconf(".", _PC_NAME_MAX
)) {
376 fatal("Name too long: %s\n", file
);
379 /* truncate file at pathname delim to do pathconf */
383 * file: pppppppp\0fffff\0
385 * ............. ^ delim
387 if (strlen(delim
+ 1) > pathconf(file
, _PC_NAME_MAX
)) {
388 fatal("Name too long: %s\n", delim
+ 1);
393 if ((opfil
= fopen(file
, "w")) == NULL
) {
394 (void) strcpy(tfile
, file
);
395 (void) sprintf(fptr
, "%.*d", fiwidth
, (ctr
-2));
396 fatal("Cannot create %s\n", tfile
);
404 * Getline gets a line via fgets from the input stream "infile".
405 * The line is put into linbuf and may not be larger than LINSIZ.
406 * If getaline is called with a non-zero value, the current line
407 * is bumped, otherwise it is not(for R.E. searching).
411 getaline(int bumpcur
)
416 ret
= fgets(linbuf
, LINSIZ
, infile
);
421 * Line_arg handles line number arguments.
422 * line_arg takes as its argument a pointer to a character string
423 * (assumed to be a line number). If that character string can be
424 * converted to a number(long long), to_line is called with that number,
433 if (asc_to_ll(line
, &to
) == ERR
)
434 fatal("%s: bad line number\n", line
);
439 * Num_arg handles repeat arguments.
440 * Num_arg copies the numeric argument to "rep" (error if number is
441 * larger than 20 characters or } is left off). Num_arg then converts
442 * the number and checks for validity. Next num_arg checks the mode
443 * of the previous argument, and applys the argument the correct number
444 * of times. If the mode is not set properly its an error.
448 num_arg(char *arg
, int md
)
450 offset_t repeat
, toline
;
456 for (++arg
; *arg
!= '}'; arg
+= len
) {
458 fatal("%s: missing '}'\n", targ
);
459 if ((len
= mblen(arg
, MB_LEN_MAX
)) <= 0)
461 if ((ptr
+ len
) >= &rep
[20])
462 fatal("%s: Repeat count too large\n", targ
);
463 (void) memcpy(ptr
, arg
, len
);
467 if ((asc_to_ll(rep
, &repeat
) == ERR
) || repeat
< 0L)
468 fatal("Illegal repeat count: %s\n", targ
);
470 toline
= offset
= curline
;
471 for (; repeat
> 0LL; repeat
--) {
475 } else if (md
== EXPMODE
)
476 for (; repeat
> 0LL; repeat
--)
477 to_line(findline(expbuf
, offset
));
479 fatal("No operation for %s\n", targ
);
483 * Re_arg handles regular expression arguments.
484 * Re_arg takes a csplit regular expression argument. It checks for
485 * delimiter balance, computes any offset, and compiles the regular
486 * expression. Findline is called with the compiled expression and
487 * offset, and returns the corresponding line number, which is used
488 * as input to the to_line function.
506 fatal("%s: missing delimiter\n", targ
);
508 if ((len
= mblen(ptr
, MB_LEN_MAX
)) <= 0)
514 * The line below was added because compile no longer supports
515 * the fourth argument being passed. The fourth argument used
520 if (asc_to_ll(++ptr
, &offset
) == ERR
)
521 fatal("%s: illegal offset\n", string
);
524 * The line below was added because INIT which did this for us
525 * was removed from compile in regexp.h
529 expbuf
= compile(string
, (char *)0, (char *)0);
532 to_line(findline(expbuf
, offset
));
536 * Sig handles breaks. When a break occurs the signal is reset,
537 * and fatal is called to clean up and print the argument which
538 * was being processed at the time the interrupt occured.
545 (void) signal(SIGINT
, sig
);
546 fatal("Interrupt - program aborted at arg '%s'\n", targ
);
550 * To_line creates split files.
551 * To_line gets as its argument the line which the current argument
552 * referenced. To_line calls getfile for a new output stream, which
553 * does nothing if create is False. If to_line's argument is not LAST
554 * it checks that the current line is not greater than its argument.
555 * While the current line is less than the desired line to_line gets
556 * lines and flushes(error if EOF is reached).
557 * If to_line's argument is LAST, it checks for more lines, and gets
558 * and flushes lines till the end of file.
559 * Finally, to_line calls closefile to close the output stream.
568 fatal("%s - out of range\n", targ
);
569 while (curline
< ln
) {
570 if (getaline(TRUE
) == NULL
)
571 fatal("%s - out of range\n", targ
);
574 } else /* last file */
575 if (getaline(TRUE
) != NULL
) {
578 if (getaline(TRUE
) == NULL
)
583 fatal("%s - out of range\n", targ
);
590 (void) fprintf(stderr
, gettext(
591 "usage: csplit [-ks] [-f prefix] [-n number] "
592 "file arg1 ...argn\n"));