4 /* Copyright (C) 1989, 1990, 1991, 1992, 2000, 2001
5 Free Software Foundation, Inc.
6 Written by James Clark (jjc@jclark.com)
8 This file is part of groff.
10 groff is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
15 groff is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License along
21 with groff; see the file COPYING. If not, write to the Free Software
22 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
46 int load(int fd
, const char *filename
);
47 const char *get_start() const;
48 const char *get_end() const;
51 typedef unsigned char uchar
;
53 static uchar map
[256];
54 static uchar inv_map
[256][3];
60 static map_init the_map_init
;
65 for (i
= 0; i
< 256; i
++)
66 map
[i
] = csalnum(i
) ? cmlower(i
) : '\0';
67 for (i
= 0; i
< 256; i
++) {
70 inv_map
[i
][1] = cmupper(i
);
73 else if (csdigit(i
)) {
88 bmpattern(const char *pattern
, int pattern_length
);
90 const char *search(const char *p
, const char *end
) const;
94 bmpattern::bmpattern(const char *pattern
, int pattern_length
)
99 for (i
= 0; i
< len
; i
++)
100 pat
[i
] = map
[uchar(pattern
[i
])];
101 for (i
= 0; i
< 256; i
++)
103 for (i
= 0; i
< len
; i
++)
104 for (const unsigned char *inv
= inv_map
[uchar(pat
[i
])]; *inv
; inv
++)
105 delta
[*inv
] = len
- i
- 1;
108 const char *bmpattern::search(const char *buf
, const char *end
) const
110 int buflen
= end
- buf
;
115 strend
= end
- len
*4;
118 const char *k
= buf
+ len
- 1;
119 const int *del
= delta
;
120 const char *pattern
= pat
;
123 int t
= del
[uchar(*k
)];
130 while (k
< end
&& del
[uchar(*k
)] != 0)
139 if (map
[uchar(*--s
)] != uchar(pattern
[--j
]))
147 bmpattern::~bmpattern()
152 inline int bmpattern::length() const
158 static const char *find_end(const char *bufend
, const char *p
);
160 const char *linear_searcher::search_and_check(const bmpattern
*key
,
161 const char *buf
, const char *bufend
, const char **start
) const
163 assert(buf
[-1] == '\n');
164 assert(bufend
[-1] == '\n');
165 const char *ptr
= buf
;
167 const char *found
= key
->search(ptr
, bufend
);
170 if (check_match(buf
, bufend
, found
, key
->length(), &ptr
, start
))
176 static const char *skip_field(const char *end
, const char *p
)
180 if (p
== end
|| *p
== '%')
183 for (q
= p
; *q
== ' ' || *q
== '\t'; q
++)
192 static const char *find_end(const char *bufend
, const char *p
)
199 for (q
= p
; *q
== ' ' || *q
== '\t'; q
++)
209 int linear_searcher::check_match(const char *buf
, const char *bufend
,
210 const char *match
, int matchlen
,
211 const char **cont
, const char **start
) const
214 // The user is required to supply only the first truncate_len characters
215 // of the key. If truncate_len <= 0, he must supply all the key.
216 if ((truncate_len
<= 0 || matchlen
< truncate_len
)
217 && map
[uchar(match
[matchlen
])] != '\0')
220 // The character before the match must not be an alphanumeric
221 // character (unless the alphanumeric character follows one or two
222 // percent characters at the beginning of the line), nor must it be
223 // a percent character at the beginning of a line, nor a percent
224 // character following a percent character at the beginning of a
227 switch (match
- buf
) {
231 if (match
[-1] == '%' || map
[uchar(match
[-1])] != '\0')
235 if (map
[uchar(match
[-1])] != '\0' && match
[-2] != '%')
238 && (match
[-2] == '\n' || match
[-2] == '%'))
242 if (map
[uchar(match
[-1])] != '\0'
243 && !(match
[-2] == '%'
244 && (match
[-3] == '\n'
245 || (match
[-3] == '%' && match
[-4] == '\n'))))
248 && (match
[-2] == '\n'
249 || (match
[-2] == '%' && match
[-3] == '\n')))
253 const char *p
= match
;
257 if (!had_percent
&& p
[1] == '%') {
258 if (p
[2] != '\0' && strchr(ignore_fields
, p
[2]) != 0) {
259 *cont
= skip_field(bufend
, match
+ matchlen
);
272 for (q
= p
- 1; *q
== ' ' || *q
== '\t'; q
--)
286 file_buffer::file_buffer()
287 : buffer(0), bufend(0)
291 file_buffer::~file_buffer()
296 const char *file_buffer::get_start() const
298 return buffer
? buffer
+ 4 : 0;
301 const char *file_buffer::get_end() const
306 int file_buffer::load(int fd
, const char *filename
)
309 if (fstat(fd
, &sb
) < 0)
310 error("can't fstat `%1': %2", filename
, strerror(errno
));
311 else if (!S_ISREG(sb
.st_mode
))
312 error("`%1' is not a regular file", filename
);
314 // We need one character extra at the beginning for an additional newline
315 // used as a sentinel. We get 4 instead so that the read buffer will be
316 // word-aligned. This seems to make the read slightly faster. We also
317 // need one character at the end also for an additional newline used as a
319 int size
= int(sb
.st_size
);
320 buffer
= new char[size
+ 4 + 1];
321 int nread
= read(fd
, buffer
+ 4, size
);
323 error("error reading `%1': %2", filename
, strerror(errno
));
324 else if (nread
!= size
)
325 error("size of `%1' decreased", filename
);
328 nread
= read(fd
, &c
, 1);
330 error("size of `%1' increased", filename
);
331 else if (memchr(buffer
+ 4, '\0', size
< 1024 ? size
: 1024) != 0)
332 error("database `%1' is a binary file", filename
);
336 int sidx
= 4, didx
= 4;
337 for ( ; sidx
< size
+ 4; sidx
++, didx
++)
339 if (buffer
[sidx
] == '\r')
341 if (buffer
[++sidx
] != '\n')
342 buffer
[didx
++] = '\r';
347 buffer
[didx
] = buffer
[sidx
];
349 bufend
= buffer
+ 4 + size
;
350 if (bufend
[-1] != '\n')
362 linear_searcher::linear_searcher(const char *query
, int query_len
,
363 const char *ign
, int trunc
)
364 : ignore_fields(ign
), truncate_len(trunc
), keys(0), nkeys(0)
366 const char *query_end
= query
+ query_len
;
369 for (p
= query
; p
< query_end
; p
++)
370 if (map
[uchar(*p
)] != '\0'
371 && (p
[1] == '\0' || map
[uchar(p
[1])] == '\0'))
375 keys
= new bmpattern
*[nk
];
378 while (p
< query_end
&& map
[uchar(*p
)] == '\0')
382 const char *start
= p
;
383 while (p
< query_end
&& map
[uchar(*p
)] != '\0')
385 keys
[nkeys
++] = new bmpattern(start
, p
- start
);
394 linear_searcher::~linear_searcher()
396 for (int i
= 0; i
< nkeys
; i
++)
401 int linear_searcher::search(const char *buffer
, const char *bufend
,
402 const char **startp
, int *lengthp
) const
404 assert(bufend
- buffer
> 0);
405 assert(buffer
[-1] == '\n');
406 assert(bufend
[-1] == '\n');
410 const char *refstart
;
411 const char *found
= search_and_check(keys
[0], buffer
, bufend
, &refstart
);
414 const char *refend
= find_end(bufend
, found
+ keys
[0]->length());
416 for (i
= 1; i
< nkeys
; i
++)
417 if (!search_and_check(keys
[i
], refstart
, refend
))
421 *lengthp
= refend
- refstart
;
429 class linear_search_item
: public search_item
{
432 linear_search_item(const char *filename
, int fid
);
433 ~linear_search_item();
435 search_item_iterator
*make_search_item_iterator(const char *);
436 friend class linear_search_item_iterator
;
439 class linear_search_item_iterator
: public search_item_iterator
{
440 linear_search_item
*lsi
;
443 linear_search_item_iterator(linear_search_item
*, const char *query
);
444 ~linear_search_item_iterator();
445 int next(const linear_searcher
&, const char **ptr
, int *lenp
,
449 search_item
*make_linear_search_item(int fd
, const char *filename
, int fid
)
451 linear_search_item
*item
= new linear_search_item(filename
, fid
);
452 if (!item
->load(fd
)) {
460 linear_search_item::linear_search_item(const char *filename
, int fid
)
461 : search_item(filename
, fid
)
465 linear_search_item::~linear_search_item()
469 int linear_search_item::load(int fd
)
471 return fbuf
.load(fd
, name
);
474 search_item_iterator
*linear_search_item::make_search_item_iterator(
477 return new linear_search_item_iterator(this, query
);
480 linear_search_item_iterator::linear_search_item_iterator(
481 linear_search_item
*p
, const char *)
486 linear_search_item_iterator::~linear_search_item_iterator()
490 int linear_search_item_iterator::next(const linear_searcher
&searcher
,
491 const char **startp
, int *lengthp
,
494 const char *bufstart
= lsi
->fbuf
.get_start();
495 const char *bufend
= lsi
->fbuf
.get_end();
496 const char *ptr
= bufstart
+ pos
;
497 if (ptr
< bufend
&& searcher
.search(ptr
, bufend
, startp
, lengthp
)) {
498 pos
= *startp
+ *lengthp
- bufstart
;
500 *ridp
= reference_id(lsi
->filename_id
, *startp
- bufstart
);