No empty .Rs/.Re
[netbsd-mini2440.git] / gnu / dist / groff / src / libs / libbib / linear.cpp
blob4762b4db86c960f0ad4a0e9fe76ba54ddc0338a2
1 /* $NetBSD$ */
3 // -*- C++ -*-
4 /* Copyright (C) 1989, 1990, 1991, 1992, 2000, 2001
5 Free Software Foundation, Inc.
6 Written by James Clark (jjc@jclark.com)
8 This file is part of groff.
10 groff is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
15 groff is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License along
21 with groff; see the file COPYING. If not, write to the Free Software
22 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
24 #include "lib.h"
26 #include <stdlib.h>
27 #include <assert.h>
28 #include <errno.h>
30 #include "posix.h"
31 #include "errarg.h"
32 #include "error.h"
33 #include "cset.h"
34 #include "cmap.h"
35 #include "nonposix.h"
37 #include "refid.h"
38 #include "search.h"
40 class file_buffer {
41 char *buffer;
42 char *bufend;
43 public:
44 file_buffer();
45 ~file_buffer();
46 int load(int fd, const char *filename);
47 const char *get_start() const;
48 const char *get_end() const;
51 typedef unsigned char uchar;
53 static uchar map[256];
54 static uchar inv_map[256][3];
56 struct map_init {
57 map_init();
60 static map_init the_map_init;
62 map_init::map_init()
64 int i;
65 for (i = 0; i < 256; i++)
66 map[i] = csalnum(i) ? cmlower(i) : '\0';
67 for (i = 0; i < 256; i++) {
68 if (cslower(i)) {
69 inv_map[i][0] = i;
70 inv_map[i][1] = cmupper(i);
71 inv_map[i][2] = '\0';
73 else if (csdigit(i)) {
74 inv_map[i][0] = i;
75 inv_map[i][1] = 0;
77 else
78 inv_map[i][0] = '\0';
83 class bmpattern {
84 char *pat;
85 int len;
86 int delta[256];
87 public:
88 bmpattern(const char *pattern, int pattern_length);
89 ~bmpattern();
90 const char *search(const char *p, const char *end) const;
91 int length() const;
94 bmpattern::bmpattern(const char *pattern, int pattern_length)
95 : len(pattern_length)
97 pat = new char[len];
98 int i;
99 for (i = 0; i < len; i++)
100 pat[i] = map[uchar(pattern[i])];
101 for (i = 0; i < 256; i++)
102 delta[i] = len;
103 for (i = 0; i < len; i++)
104 for (const unsigned char *inv = inv_map[uchar(pat[i])]; *inv; inv++)
105 delta[*inv] = len - i - 1;
108 const char *bmpattern::search(const char *buf, const char *end) const
110 int buflen = end - buf;
111 if (len > buflen)
112 return 0;
113 const char *strend;
114 if (buflen > len*4)
115 strend = end - len*4;
116 else
117 strend = buf;
118 const char *k = buf + len - 1;
119 const int *del = delta;
120 const char *pattern = pat;
121 for (;;) {
122 while (k < strend) {
123 int t = del[uchar(*k)];
124 if (!t)
125 break;
126 k += t;
127 k += del[uchar(*k)];
128 k += del[uchar(*k)];
130 while (k < end && del[uchar(*k)] != 0)
131 k++;
132 if (k == end)
133 break;
134 int j = len - 1;
135 const char *s = k;
136 for (;;) {
137 if (j == 0)
138 return s;
139 if (map[uchar(*--s)] != uchar(pattern[--j]))
140 break;
142 k++;
144 return 0;
147 bmpattern::~bmpattern()
149 a_delete pat;
152 inline int bmpattern::length() const
154 return len;
158 static const char *find_end(const char *bufend, const char *p);
160 const char *linear_searcher::search_and_check(const bmpattern *key,
161 const char *buf, const char *bufend, const char **start) const
163 assert(buf[-1] == '\n');
164 assert(bufend[-1] == '\n');
165 const char *ptr = buf;
166 for (;;) {
167 const char *found = key->search(ptr, bufend);
168 if (!found)
169 break;
170 if (check_match(buf, bufend, found, key->length(), &ptr, start))
171 return found;
173 return 0;
176 static const char *skip_field(const char *end, const char *p)
178 for (;;)
179 if (*p++ == '\n') {
180 if (p == end || *p == '%')
181 break;
182 const char *q;
183 for (q = p; *q == ' ' || *q == '\t'; q++)
185 if (*q == '\n')
186 break;
187 p = q + 1;
189 return p;
192 static const char *find_end(const char *bufend, const char *p)
194 for (;;)
195 if (*p++ == '\n') {
196 if (p == bufend)
197 break;
198 const char *q;
199 for (q = p; *q == ' ' || *q == '\t'; q++)
201 if (*q == '\n')
202 break;
203 p = q + 1;
205 return p;
209 int linear_searcher::check_match(const char *buf, const char *bufend,
210 const char *match, int matchlen,
211 const char **cont, const char **start) const
213 *cont = match + 1;
214 // The user is required to supply only the first truncate_len characters
215 // of the key. If truncate_len <= 0, he must supply all the key.
216 if ((truncate_len <= 0 || matchlen < truncate_len)
217 && map[uchar(match[matchlen])] != '\0')
218 return 0;
220 // The character before the match must not be an alphanumeric
221 // character (unless the alphanumeric character follows one or two
222 // percent characters at the beginning of the line), nor must it be
223 // a percent character at the beginning of a line, nor a percent
224 // character following a percent character at the beginning of a
225 // line.
227 switch (match - buf) {
228 case 0:
229 break;
230 case 1:
231 if (match[-1] == '%' || map[uchar(match[-1])] != '\0')
232 return 0;
233 break;
234 case 2:
235 if (map[uchar(match[-1])] != '\0' && match[-2] != '%')
236 return 0;
237 if (match[-1] == '%'
238 && (match[-2] == '\n' || match[-2] == '%'))
239 return 0;
240 break;
241 default:
242 if (map[uchar(match[-1])] != '\0'
243 && !(match[-2] == '%'
244 && (match[-3] == '\n'
245 || (match[-3] == '%' && match[-4] == '\n'))))
246 return 0;
247 if (match[-1] == '%'
248 && (match[-2] == '\n'
249 || (match[-2] == '%' && match[-3] == '\n')))
250 return 0;
253 const char *p = match;
254 int had_percent = 0;
255 for (;;) {
256 if (*p == '\n') {
257 if (!had_percent && p[1] == '%') {
258 if (p[2] != '\0' && strchr(ignore_fields, p[2]) != 0) {
259 *cont = skip_field(bufend, match + matchlen);
260 return 0;
262 if (!start)
263 break;
264 had_percent = 1;
266 if (p <= buf) {
267 if (start)
268 *start = p + 1;
269 return 1;
271 const char *q;
272 for (q = p - 1; *q == ' ' || *q == '\t'; q--)
274 if (*q == '\n') {
275 if (start)
276 *start = p + 1;
277 break;
279 p = q;
281 p--;
283 return 1;
286 file_buffer::file_buffer()
287 : buffer(0), bufend(0)
291 file_buffer::~file_buffer()
293 a_delete buffer;
296 const char *file_buffer::get_start() const
298 return buffer ? buffer + 4 : 0;
301 const char *file_buffer::get_end() const
303 return bufend;
306 int file_buffer::load(int fd, const char *filename)
308 struct stat sb;
309 if (fstat(fd, &sb) < 0)
310 error("can't fstat `%1': %2", filename, strerror(errno));
311 else if (!S_ISREG(sb.st_mode))
312 error("`%1' is not a regular file", filename);
313 else {
314 // We need one character extra at the beginning for an additional newline
315 // used as a sentinel. We get 4 instead so that the read buffer will be
316 // word-aligned. This seems to make the read slightly faster. We also
317 // need one character at the end also for an additional newline used as a
318 // sentinel.
319 int size = int(sb.st_size);
320 buffer = new char[size + 4 + 1];
321 int nread = read(fd, buffer + 4, size);
322 if (nread < 0)
323 error("error reading `%1': %2", filename, strerror(errno));
324 else if (nread != size)
325 error("size of `%1' decreased", filename);
326 else {
327 char c;
328 nread = read(fd, &c, 1);
329 if (nread != 0)
330 error("size of `%1' increased", filename);
331 else if (memchr(buffer + 4, '\0', size < 1024 ? size : 1024) != 0)
332 error("database `%1' is a binary file", filename);
333 else {
334 close(fd);
335 buffer[3] = '\n';
336 int sidx = 4, didx = 4;
337 for ( ; sidx < size + 4; sidx++, didx++)
339 if (buffer[sidx] == '\r')
341 if (buffer[++sidx] != '\n')
342 buffer[didx++] = '\r';
343 else
344 size--;
346 if (sidx != didx)
347 buffer[didx] = buffer[sidx];
349 bufend = buffer + 4 + size;
350 if (bufend[-1] != '\n')
351 *bufend++ = '\n';
352 return 1;
355 a_delete buffer;
356 buffer = 0;
358 close(fd);
359 return 0;
362 linear_searcher::linear_searcher(const char *query, int query_len,
363 const char *ign, int trunc)
364 : ignore_fields(ign), truncate_len(trunc), keys(0), nkeys(0)
366 const char *query_end = query + query_len;
367 int nk = 0;
368 const char *p;
369 for (p = query; p < query_end; p++)
370 if (map[uchar(*p)] != '\0'
371 && (p[1] == '\0' || map[uchar(p[1])] == '\0'))
372 nk++;
373 if (nk == 0)
374 return;
375 keys = new bmpattern*[nk];
376 p = query;
377 for (;;) {
378 while (p < query_end && map[uchar(*p)] == '\0')
379 p++;
380 if (p == query_end)
381 break;
382 const char *start = p;
383 while (p < query_end && map[uchar(*p)] != '\0')
384 p++;
385 keys[nkeys++] = new bmpattern(start, p - start);
387 assert(nkeys <= nk);
388 if (nkeys == 0) {
389 a_delete keys;
390 keys = 0;
394 linear_searcher::~linear_searcher()
396 for (int i = 0; i < nkeys; i++)
397 delete keys[i];
398 a_delete keys;
401 int linear_searcher::search(const char *buffer, const char *bufend,
402 const char **startp, int *lengthp) const
404 assert(bufend - buffer > 0);
405 assert(buffer[-1] == '\n');
406 assert(bufend[-1] == '\n');
407 if (nkeys == 0)
408 return 0;
409 for (;;) {
410 const char *refstart;
411 const char *found = search_and_check(keys[0], buffer, bufend, &refstart);
412 if (!found)
413 break;
414 const char *refend = find_end(bufend, found + keys[0]->length());
415 int i;
416 for (i = 1; i < nkeys; i++)
417 if (!search_and_check(keys[i], refstart, refend))
418 break;
419 if (i >= nkeys) {
420 *startp = refstart;
421 *lengthp = refend - refstart;
422 return 1;
424 buffer = refend;
426 return 0;
429 class linear_search_item : public search_item {
430 file_buffer fbuf;
431 public:
432 linear_search_item(const char *filename, int fid);
433 ~linear_search_item();
434 int load(int fd);
435 search_item_iterator *make_search_item_iterator(const char *);
436 friend class linear_search_item_iterator;
439 class linear_search_item_iterator : public search_item_iterator {
440 linear_search_item *lsi;
441 int pos;
442 public:
443 linear_search_item_iterator(linear_search_item *, const char *query);
444 ~linear_search_item_iterator();
445 int next(const linear_searcher &, const char **ptr, int *lenp,
446 reference_id *ridp);
449 search_item *make_linear_search_item(int fd, const char *filename, int fid)
451 linear_search_item *item = new linear_search_item(filename, fid);
452 if (!item->load(fd)) {
453 delete item;
454 return 0;
456 else
457 return item;
460 linear_search_item::linear_search_item(const char *filename, int fid)
461 : search_item(filename, fid)
465 linear_search_item::~linear_search_item()
469 int linear_search_item::load(int fd)
471 return fbuf.load(fd, name);
474 search_item_iterator *linear_search_item::make_search_item_iterator(
475 const char *query)
477 return new linear_search_item_iterator(this, query);
480 linear_search_item_iterator::linear_search_item_iterator(
481 linear_search_item *p, const char *)
482 : lsi(p), pos(0)
486 linear_search_item_iterator::~linear_search_item_iterator()
490 int linear_search_item_iterator::next(const linear_searcher &searcher,
491 const char **startp, int *lengthp,
492 reference_id *ridp)
494 const char *bufstart = lsi->fbuf.get_start();
495 const char *bufend = lsi->fbuf.get_end();
496 const char *ptr = bufstart + pos;
497 if (ptr < bufend && searcher.search(ptr, bufend, startp, lengthp)) {
498 pos = *startp + *lengthp - bufstart;
499 if (ridp)
500 *ridp = reference_id(lsi->filename_id, *startp - bufstart);
501 return 1;
503 else
504 return 0;