4 /* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2004
5 Free Software Foundation, Inc.
6 Written by James Clark (jjc@jclark.com)
8 This file is part of groff.
10 groff is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
15 groff is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License along
21 with groff; see the file COPYING. If not, write to the Free Software
22 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
44 void *mapread(int fd
, int len
);
45 int unmap(void *, int len
);
57 class index_search_item
: public search_item
{
58 search_item
*out_of_date_files
;
68 char *filename_buffer
;
70 char **common_words_table
;
71 int common_words_table_size
;
72 const char *ignore_fields
;
75 const char *do_verify();
76 const int *search1(const char **pp
, const char *end
);
77 const int *search(const char *ptr
, int length
, int **temp_listp
);
78 const char *munge_filename(const char *);
79 void read_common_words_file();
80 void add_out_of_date_file(int fd
, const char *filename
, int fid
);
82 index_search_item(const char *, int);
85 search_item_iterator
*make_search_item_iterator(const char *);
88 int next_filename_id() const;
89 friend class index_search_item_iterator
;
92 class index_search_item_iterator
: public search_item_iterator
{
93 index_search_item
*indx
;
94 search_item_iterator
*out_of_date_files_iter
;
95 search_item
*next_out_of_date_file
;
96 const int *found_list
;
100 linear_searcher searcher
;
102 int get_tag(int tagno
, const linear_searcher
&, const char **, int *,
105 index_search_item_iterator(index_search_item
*, const char *);
106 ~index_search_item_iterator();
107 int next(const linear_searcher
&, const char **, int *, reference_id
*);
111 index_search_item::index_search_item(const char *filename
, int fid
)
112 : search_item(filename
, fid
), out_of_date_files(0), buffer(0), map_addr(0),
113 map_len(0), key_buffer(0), filename_buffer(0), filename_buflen(0),
114 common_words_table(0)
118 index_search_item::~index_search_item()
123 if (unmap(map_addr
, map_len
) < 0)
124 error("unmap: %1", strerror(errno
));
126 while (out_of_date_files
) {
127 search_item
*tem
= out_of_date_files
;
128 out_of_date_files
= out_of_date_files
->next
;
131 a_delete filename_buffer
;
133 if (common_words_table
) {
134 for (int i
= 0; i
< common_words_table_size
; i
++)
135 a_delete common_words_table
[i
];
136 a_delete common_words_table
;
143 file_closer(int &fd
) : fdp(&fd
) { }
144 ~file_closer() { close(*fdp
); }
147 // Tell the compiler that a variable is intentionally unused.
148 inline void unused(void *) { }
150 int index_search_item::load(int fd
)
152 file_closer
fd_closer(fd
); // close fd on return
155 if (fstat(fd
, &sb
) < 0) {
156 error("can't fstat `%1': %2", name
, strerror(errno
));
159 if (!S_ISREG(sb
.st_mode
)) {
160 error("`%1' is not a regular file", name
);
164 int size
= int(sb
.st_size
);
166 map_addr
= mapread(fd
, size
);
168 addr
= (char *)map_addr
;
172 addr
= buffer
= (char *)malloc(size
);
174 error("can't allocate buffer for `%1'", name
);
178 int bytes_to_read
= size
;
179 while (bytes_to_read
> 0) {
180 int nread
= read(fd
, ptr
, bytes_to_read
);
182 error("unexpected EOF on `%1'", name
);
186 error("read error on `%1': %2", name
, strerror(errno
));
189 bytes_to_read
-= nread
;
193 header
= *(index_header
*)addr
;
194 if (header
.magic
!= INDEX_MAGIC
) {
195 error("`%1' is not an index file: wrong magic number", name
);
198 if (header
.version
!= INDEX_VERSION
) {
199 error("version number in `%1' is wrong: was %2, should be %3",
200 name
, header
.version
, INDEX_VERSION
);
203 int sz
= (header
.tags_size
* sizeof(tag
)
204 + header
.lists_size
* sizeof(int)
205 + header
.table_size
* sizeof(int)
206 + header
.strings_size
209 error("size of `%1' is wrong: was %2, should be %3",
213 tags
= (tag
*)(addr
+ sizeof(header
));
214 lists
= (int *)(tags
+ header
.tags_size
);
215 table
= (int *)(lists
+ header
.lists_size
);
216 pool
= (char *)(table
+ header
.table_size
);
217 ignore_fields
= strchr(strchr(pool
, '\0') + 1, '\0') + 1;
218 key_buffer
= new char[header
.truncate
];
219 read_common_words_file();
223 const char *index_search_item::do_verify()
227 if (lists
[header
.lists_size
- 1] >= 0)
228 return "last list element not negative";
230 for (i
= 0; i
< header
.table_size
; i
++) {
232 if (li
>= header
.lists_size
)
233 return "bad list index";
235 for (int *ptr
= lists
+ li
; *ptr
>= 0; ptr
++) {
236 if (*ptr
>= header
.tags_size
)
237 return "bad tag index";
238 if (*ptr
>= ptr
[1] && ptr
[1] >= 0)
239 return "list not ordered";
243 for (i
= 0; i
< header
.tags_size
; i
++) {
244 if (tags
[i
].filename_index
>= header
.strings_size
)
245 return "bad index in tags";
246 if (tags
[i
].length
< 0)
247 return "bad length in tags";
248 if (tags
[i
].start
< 0)
249 return "bad start in tags";
251 if (pool
[header
.strings_size
- 1] != '\0')
252 return "last character in pool not nul";
256 int index_search_item::verify()
258 const char *reason
= do_verify();
261 error("`%1' is bad: %2", name
, reason
);
265 int index_search_item::next_filename_id() const
267 return filename_id
+ header
.strings_size
+ 1;
270 search_item_iterator
*index_search_item::make_search_item_iterator(
273 return new index_search_item_iterator(this, query
);
276 search_item
*make_index_search_item(const char *filename
, int fid
)
278 char *index_filename
= new char[strlen(filename
) + sizeof(INDEX_SUFFIX
)];
279 strcpy(index_filename
, filename
);
280 strcat(index_filename
, INDEX_SUFFIX
);
281 int fd
= open(index_filename
, O_RDONLY
| O_BINARY
);
284 index_search_item
*item
= new index_search_item(index_filename
, fid
);
285 a_delete index_filename
;
286 if (!item
->load(fd
)) {
291 else if (verify_flag
&& !item
->verify()) {
302 index_search_item_iterator::index_search_item_iterator(index_search_item
*ind
,
304 : indx(ind
), out_of_date_files_iter(0), next_out_of_date_file(0), temp_list(0),
306 searcher(q
, strlen(q
), ind
->ignore_fields
, ind
->header
.truncate
),
309 found_list
= indx
->search(q
, strlen(q
), &temp_list
);
311 found_list
= &minus_one
;
312 warning("all keys would have been discarded in constructing index `%1'",
317 index_search_item_iterator::~index_search_item_iterator()
322 delete out_of_date_files_iter
;
325 int index_search_item_iterator::next(const linear_searcher
&,
326 const char **pp
, int *lenp
,
331 int tagno
= *found_list
;
335 if (get_tag(tagno
, searcher
, pp
, lenp
, ridp
))
339 next_out_of_date_file
= indx
->out_of_date_files
;
341 while (next_out_of_date_file
) {
342 if (out_of_date_files_iter
== 0)
343 out_of_date_files_iter
344 = next_out_of_date_file
->make_search_item_iterator(query
);
345 if (out_of_date_files_iter
->next(searcher
, pp
, lenp
, ridp
))
347 delete out_of_date_files_iter
;
348 out_of_date_files_iter
= 0;
349 next_out_of_date_file
= next_out_of_date_file
->next
;
354 int index_search_item_iterator::get_tag(int tagno
,
355 const linear_searcher
&searchr
,
356 const char **pp
, int *lenp
,
359 if (tagno
< 0 || tagno
>= indx
->header
.tags_size
) {
360 error("bad tag number");
363 tag
*tp
= indx
->tags
+ tagno
;
364 const char *filename
= indx
->munge_filename(indx
->pool
+ tp
->filename_index
);
365 int fd
= open(filename
, O_RDONLY
| O_BINARY
);
367 error("can't open `%1': %2", filename
, strerror(errno
));
371 if (fstat(fd
, &sb
) < 0) {
372 error("can't fstat: %1", strerror(errno
));
376 time_t mtime
= sb
.st_mtime
;
377 if (mtime
> indx
->mtime
) {
378 indx
->add_out_of_date_file(fd
, filename
,
379 indx
->filename_id
+ tp
->filename_index
);
383 FILE *fp
= fdopen(fd
, FOPEN_RB
);
385 error("fdopen failed");
389 if (tp
->start
!= 0 && fseek(fp
, long(tp
->start
), 0) < 0)
390 error("can't seek on `%1': %2", filename
, strerror(errno
));
392 int length
= tp
->length
;
395 if (fstat(fileno(fp
), &sb
) < 0) {
396 error("can't stat `%1': %2", filename
, strerror(errno
));
399 else if (!S_ISREG(sb
.st_mode
)) {
400 error("`%1' is not a regular file", filename
);
404 length
= int(sb
.st_size
);
407 if (length
+ 2 > buflen
) {
410 buf
= new char[buflen
];
412 if (fread(buf
+ 1, 1, length
, fp
) != (size_t)length
)
413 error("fread on `%1' failed: %2", filename
, strerror(errno
));
416 // Remove the CR characters from CRLF pairs.
417 int sidx
= 1, didx
= 1;
418 for ( ; sidx
< length
+ 1; sidx
++, didx
++)
420 if (buf
[sidx
] == '\r')
422 if (buf
[++sidx
] != '\n')
428 buf
[didx
] = buf
[sidx
];
430 buf
[length
+ 1] = '\n';
431 res
= searchr
.search(buf
+ 1, buf
+ 2 + length
, pp
, lenp
);
433 *ridp
= reference_id(indx
->filename_id
+ tp
->filename_index
,
442 const char *index_search_item::munge_filename(const char *filename
)
444 if (IS_ABSOLUTE(filename
))
446 const char *cwd
= pool
;
447 int need_slash
= (cwd
[0] != 0
448 && strchr(DIR_SEPS
, strchr(cwd
, '\0')[-1]) == 0);
449 int len
= strlen(cwd
) + strlen(filename
) + need_slash
+ 1;
450 if (len
> filename_buflen
) {
451 a_delete filename_buffer
;
452 filename_buflen
= len
;
453 filename_buffer
= new char[len
];
455 strcpy(filename_buffer
, cwd
);
457 strcat(filename_buffer
, "/");
458 strcat(filename_buffer
, filename
);
459 return filename_buffer
;
462 const int *index_search_item::search1(const char **pp
, const char *end
)
464 while (*pp
< end
&& !csalnum(**pp
))
468 const char *start
= *pp
;
469 while (*pp
< end
&& csalnum(**pp
))
471 int len
= *pp
- start
;
472 if (len
< header
.shortest
)
474 if (len
> header
.truncate
)
475 len
= header
.truncate
;
477 for (int i
= 0; i
< len
; i
++)
478 if (csdigit(start
[i
]))
479 key_buffer
[i
] = start
[i
];
481 key_buffer
[i
] = cmlower(start
[i
]);
484 if (is_number
&& !(len
== 4 && start
[0] == '1' && start
[1] == '9'))
486 unsigned hc
= hash(key_buffer
, len
);
487 if (common_words_table
) {
488 for (int h
= hc
% common_words_table_size
;
489 common_words_table
[h
];
491 if (strlen(common_words_table
[h
]) == (size_t)len
492 && memcmp(common_words_table
[h
], key_buffer
, len
) == 0)
495 h
= common_words_table_size
;
498 int li
= table
[int(hc
% header
.table_size
)];
499 return li
< 0 ? &minus_one
: lists
+ li
;
502 static void merge(int *result
, const int *s1
, const int *s2
)
504 for (; *s1
>= 0; s1
++) {
505 while (*s2
>= 0 && *s2
< *s1
)
513 const int *index_search_item::search(const char *ptr
, int length
,
516 const char *end
= ptr
+ length
;
518 a_delete
*temp_listp
;
521 const int *first_list
= 0;
522 while (ptr
< end
&& (first_list
= search1(&ptr
, end
)) == 0)
528 const int *second_list
= 0;
529 while (ptr
< end
&& (second_list
= search1(&ptr
, end
)) == 0)
533 if (*second_list
< 0)
536 for (p
= first_list
; *p
>= 0; p
++)
538 int len
= p
- first_list
;
539 for (p
= second_list
; *p
>= 0; p
++)
541 if (p
- second_list
< len
)
542 len
= p
- second_list
;
543 int *matches
= new int[len
+ 1];
544 merge(matches
, first_list
, second_list
);
546 const int *list
= search1(&ptr
, end
);
552 merge(matches
, matches
, list
);
559 *temp_listp
= matches
;
563 void index_search_item::read_common_words_file()
565 if (header
.common
<= 0)
567 const char *common_words_file
= munge_filename(strchr(pool
, '\0') + 1);
569 FILE *fp
= fopen(common_words_file
, "r");
571 error("can't open `%1': %2", common_words_file
, strerror(errno
));
574 common_words_table_size
= 2*header
.common
+ 1;
575 while (!is_prime(common_words_table_size
))
576 common_words_table_size
++;
577 common_words_table
= new char *[common_words_table_size
];
578 for (int i
= 0; i
< common_words_table_size
; i
++)
579 common_words_table
[i
] = 0;
584 while (c
!= EOF
&& !csalnum(c
))
589 if (key_len
< header
.truncate
)
590 key_buffer
[key_len
++] = cmlower(c
);
592 } while (c
!= EOF
&& csalnum(c
));
593 if (key_len
>= header
.shortest
) {
594 int h
= hash(key_buffer
, key_len
) % common_words_table_size
;
595 while (common_words_table
[h
]) {
597 h
= common_words_table_size
;
600 common_words_table
[h
] = new char[key_len
+ 1];
601 memcpy(common_words_table
[h
], key_buffer
, key_len
);
602 common_words_table
[h
][key_len
] = '\0';
604 if (++count
>= header
.common
)
613 void index_search_item::add_out_of_date_file(int fd
, const char *filename
,
617 for (pp
= &out_of_date_files
; *pp
; pp
= &(*pp
)->next
)
618 if ((*pp
)->is_named(filename
))
620 *pp
= make_linear_search_item(fd
, filename
, fid
);
621 warning("`%1' modified since `%2' created", filename
, name
);
624 void index_search_item::check_files()
626 const char *pool_end
= pool
+ header
.strings_size
;
627 for (const char *ptr
= strchr(ignore_fields
, '\0') + 1;
629 ptr
= strchr(ptr
, '\0') + 1) {
630 const char *path
= munge_filename(ptr
);
632 if (stat(path
, &sb
) < 0)
633 error("can't stat `%1': %2", path
, strerror(errno
));
634 else if (sb
.st_mtime
> mtime
) {
635 int fd
= open(path
, O_RDONLY
| O_BINARY
);
637 error("can't open `%1': %2", path
, strerror(errno
));
639 add_out_of_date_file(fd
, path
, filename_id
+ (ptr
- pool
));