Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / grep / src / dosbuf.c
blob6ba31396b62c985b0084a59e365c82904d403baa
1 /* $NetBSD$ */
3 /* Messy DOS-specific code for correctly treating binary, Unix text
4 and DOS text files.
6 This has several aspects:
8 * Guessing the file type (unless the user tells us);
9 * Stripping CR characters from DOS text files (otherwise regex
10 functions won't work correctly);
11 * Reporting correct byte count with -b for any kind of file.
15 typedef enum {
16 UNKNOWN, DOS_BINARY, DOS_TEXT, UNIX_TEXT
17 } File_type;
19 struct dos_map {
20 off_t pos; /* position in buffer passed to matcher */
21 off_t add; /* how much to add when reporting char position */
24 static int dos_report_unix_offset = 0;
26 static File_type dos_file_type = UNKNOWN;
27 static File_type dos_use_file_type = UNKNOWN;
28 static off_t dos_stripped_crs = 0;
29 static struct dos_map *dos_pos_map;
30 static int dos_pos_map_size = 0;
31 static int dos_pos_map_used = 0;
32 static int inp_map_idx = 0, out_map_idx = 1;
34 /* Guess DOS file type by looking at its contents. */
35 static inline File_type
36 guess_type (char *buf, register size_t buflen)
38 int crlf_seen = 0;
39 register char *bp = buf;
41 while (buflen--)
43 /* Treat a file as binary if it has a NUL character. */
44 if (!*bp)
45 return DOS_BINARY;
47 /* CR before LF means DOS text file (unless we later see
48 binary characters). */
49 else if (*bp == '\r' && buflen && bp[1] == '\n')
50 crlf_seen = 1;
52 bp++;
55 return crlf_seen ? DOS_TEXT : UNIX_TEXT;
58 /* Convert external DOS file representation to internal.
59 Return the count of characters left in the buffer.
60 Build table to map character positions when reporting byte counts. */
61 static inline int
62 undossify_input (register char *buf, size_t buflen)
64 int chars_left = 0;
66 if (totalcc == 0)
68 /* New file: forget everything we knew about character
69 position mapping table and file type. */
70 inp_map_idx = 0;
71 out_map_idx = 1;
72 dos_pos_map_used = 0;
73 dos_stripped_crs = 0;
74 dos_file_type = dos_use_file_type;
77 /* Guess if this file is binary, unless we already know that. */
78 if (dos_file_type == UNKNOWN)
79 dos_file_type = guess_type(buf, buflen);
81 /* If this file is to be treated as DOS Text, strip the CR characters
82 and maybe build the table for character position mapping on output. */
83 if (dos_file_type == DOS_TEXT)
85 char *destp = buf;
87 while (buflen--)
89 if (*buf != '\r')
91 *destp++ = *buf++;
92 chars_left++;
94 else
96 buf++;
97 if (out_byte && !dos_report_unix_offset)
99 dos_stripped_crs++;
100 while (buflen && *buf == '\r')
102 dos_stripped_crs++;
103 buflen--;
104 buf++;
106 if (inp_map_idx >= dos_pos_map_size - 1)
108 dos_pos_map_size = inp_map_idx ? inp_map_idx * 2 : 1000;
109 dos_pos_map =
110 (struct dos_map *)xrealloc((char *)dos_pos_map,
111 dos_pos_map_size *
112 sizeof(struct dos_map));
115 if (!inp_map_idx)
117 /* Add sentinel entry. */
118 dos_pos_map[inp_map_idx].pos = 0;
119 dos_pos_map[inp_map_idx++].add = 0;
121 /* Initialize first real entry. */
122 dos_pos_map[inp_map_idx].add = 0;
125 /* Put the new entry. If the stripped CR characters
126 precede a Newline (the usual case), pretend that
127 they were found *after* the Newline. This makes
128 displayed byte offsets more reasonable in some
129 cases, and fits better the intuitive notion that
130 the line ends *before* the CR, not *after* it. */
131 inp_map_idx++;
132 dos_pos_map[inp_map_idx-1].pos =
133 (*buf == '\n' ? destp + 1 : destp ) - bufbeg + totalcc;
134 dos_pos_map[inp_map_idx].add = dos_stripped_crs;
135 dos_pos_map_used = inp_map_idx;
137 /* The following will be updated on the next pass. */
138 dos_pos_map[inp_map_idx].pos = destp - bufbeg + totalcc + 1;
143 return chars_left;
146 return buflen;
149 /* Convert internal byte count into external. */
150 static inline off_t
151 dossified_pos (off_t byteno)
153 off_t pos_lo;
154 off_t pos_hi;
156 if (dos_file_type != DOS_TEXT || dos_report_unix_offset)
157 return byteno;
159 /* Optimization: usually the file will be scanned sequentially.
160 So in most cases, this byte position will be found in the
161 table near the previous one, as recorded in `out_map_idx'. */
162 pos_lo = dos_pos_map[out_map_idx-1].pos;
163 pos_hi = dos_pos_map[out_map_idx].pos;
165 /* If the initial guess failed, search up or down, as
166 appropriate, beginning with the previous place. */
167 if (byteno >= pos_hi)
169 out_map_idx++;
170 while (out_map_idx < dos_pos_map_used &&
171 byteno >= dos_pos_map[out_map_idx].pos)
172 out_map_idx++;
175 else if (byteno < pos_lo)
177 out_map_idx--;
178 while (out_map_idx > 1 && byteno < dos_pos_map[out_map_idx-1].pos)
179 out_map_idx--;
182 return byteno + dos_pos_map[out_map_idx].add;