Improve the process for GNU tools
[minix3.git] / external / bsd / mdocml / dist / mansearch.c
blob7a525ccc3c2217715125f2638347d2185b506c4e
1 /* Id: mansearch.c,v 1.17 2014/01/05 04:13:52 schwarze Exp */
2 /*
3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
22 #include <assert.h>
23 #include <fcntl.h>
24 #include <getopt.h>
25 #include <limits.h>
26 #include <regex.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
34 #ifdef HAVE_OHASH
35 #include <ohash.h>
36 #else
37 #include "compat_ohash.h"
38 #endif
39 #include <sqlite3.h>
41 #include "mandoc.h"
42 #include "manpath.h"
43 #include "mansearch.h"
45 #define SQL_BIND_TEXT(_db, _s, _i, _v) \
46 do { if (SQLITE_OK != sqlite3_bind_text \
47 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
48 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
49 } while (0)
50 #define SQL_BIND_INT64(_db, _s, _i, _v) \
51 do { if (SQLITE_OK != sqlite3_bind_int64 \
52 ((_s), (_i)++, (_v))) \
53 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
54 } while (0)
55 #define SQL_BIND_BLOB(_db, _s, _i, _v) \
56 do { if (SQLITE_OK != sqlite3_bind_blob \
57 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
58 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \
59 } while (0)
61 struct expr {
62 uint64_t bits; /* type-mask */
63 const char *substr; /* to search for, if applicable */
64 regex_t regexp; /* compiled regexp, if applicable */
65 int open; /* opening parentheses before */
66 int and; /* logical AND before */
67 int close; /* closing parentheses after */
68 struct expr *next; /* next in sequence */
71 struct match {
72 uint64_t id; /* identifier in database */
73 char *desc; /* description of manpage */
74 int form; /* 0 == catpage */
77 struct type {
78 uint64_t bits;
79 const char *name;
82 static const struct type types[] = {
83 { TYPE_An, "An" },
84 { TYPE_Ar, "Ar" },
85 { TYPE_At, "At" },
86 { TYPE_Bsx, "Bsx" },
87 { TYPE_Bx, "Bx" },
88 { TYPE_Cd, "Cd" },
89 { TYPE_Cm, "Cm" },
90 { TYPE_Dv, "Dv" },
91 { TYPE_Dx, "Dx" },
92 { TYPE_Em, "Em" },
93 { TYPE_Er, "Er" },
94 { TYPE_Ev, "Ev" },
95 { TYPE_Fa, "Fa" },
96 { TYPE_Fl, "Fl" },
97 { TYPE_Fn, "Fn" },
98 { TYPE_Fn, "Fo" },
99 { TYPE_Ft, "Ft" },
100 { TYPE_Fx, "Fx" },
101 { TYPE_Ic, "Ic" },
102 { TYPE_In, "In" },
103 { TYPE_Lb, "Lb" },
104 { TYPE_Li, "Li" },
105 { TYPE_Lk, "Lk" },
106 { TYPE_Ms, "Ms" },
107 { TYPE_Mt, "Mt" },
108 { TYPE_Nd, "Nd" },
109 { TYPE_Nm, "Nm" },
110 { TYPE_Nx, "Nx" },
111 { TYPE_Ox, "Ox" },
112 { TYPE_Pa, "Pa" },
113 { TYPE_Rs, "Rs" },
114 { TYPE_Sh, "Sh" },
115 { TYPE_Ss, "Ss" },
116 { TYPE_St, "St" },
117 { TYPE_Sy, "Sy" },
118 { TYPE_Tn, "Tn" },
119 { TYPE_Va, "Va" },
120 { TYPE_Va, "Vt" },
121 { TYPE_Xr, "Xr" },
122 { TYPE_sec, "sec" },
123 { TYPE_arch,"arch" },
124 { ~0ULL, "any" },
125 { 0ULL, NULL }
128 static void buildnames(struct manpage *, sqlite3 *,
129 sqlite3_stmt *, uint64_t, const char *);
130 static char *buildoutput(sqlite3 *, sqlite3_stmt *,
131 uint64_t, uint64_t);
132 static void *hash_alloc(size_t, void *);
133 static void hash_free(void *, size_t, void *);
134 static void *hash_halloc(size_t, void *);
135 static struct expr *exprcomp(const struct mansearch *,
136 int, char *[]);
137 static void exprfree(struct expr *);
138 static struct expr *exprspec(struct expr *, uint64_t,
139 const char *, const char *);
140 static struct expr *exprterm(const struct mansearch *, char *, int);
141 static void sql_append(char **sql, size_t *sz,
142 const char *newstr, int count);
143 static void sql_match(sqlite3_context *context,
144 int argc, sqlite3_value **argv);
145 static void sql_regexp(sqlite3_context *context,
146 int argc, sqlite3_value **argv);
147 static char *sql_statement(const struct expr *);
150 mansearch(const struct mansearch *search,
151 const struct manpaths *paths,
152 int argc, char *argv[],
153 const char *outkey,
154 struct manpage **res, size_t *sz)
156 int fd, rc, c, ibit;
157 int64_t id;
158 uint64_t outbit;
159 char buf[PATH_MAX];
160 char *sql;
161 struct manpage *mpage;
162 struct expr *e, *ep;
163 sqlite3 *db;
164 sqlite3_stmt *s, *s2;
165 struct match *mp;
166 struct ohash_info info;
167 struct ohash htab;
168 unsigned int idx;
169 size_t i, j, cur, maxres;
171 memset(&info, 0, sizeof(struct ohash_info));
173 info.halloc = hash_halloc;
174 info.alloc = hash_alloc;
175 info.hfree = hash_free;
176 info.key_offset = offsetof(struct match, id);
178 *sz = cur = maxres = 0;
179 sql = NULL;
180 *res = NULL;
181 fd = -1;
182 e = NULL;
183 rc = 0;
185 if (0 == argc)
186 goto out;
187 if (NULL == (e = exprcomp(search, argc, argv)))
188 goto out;
190 outbit = 0;
191 if (NULL != outkey) {
192 for (ibit = 0; types[ibit].bits; ibit++) {
193 if (0 == strcasecmp(types[ibit].name, outkey)) {
194 outbit = types[ibit].bits;
195 break;
201 * Save a descriptor to the current working directory.
202 * Since pathnames in the "paths" variable might be relative,
203 * and we'll be chdir()ing into them, we need to keep a handle
204 * on our current directory from which to start the chdir().
207 if (NULL == getcwd(buf, PATH_MAX)) {
208 perror(NULL);
209 goto out;
210 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) {
211 perror(buf);
212 goto out;
215 sql = sql_statement(e);
218 * Loop over the directories (containing databases) for us to
219 * search.
220 * Don't let missing/bad databases/directories phase us.
221 * In each, try to open the resident database and, if it opens,
222 * scan it for our match expression.
225 for (i = 0; i < paths->sz; i++) {
226 if (-1 == fchdir(fd)) {
227 perror(buf);
228 free(*res);
229 break;
230 } else if (-1 == chdir(paths->paths[i])) {
231 perror(paths->paths[i]);
232 continue;
235 c = sqlite3_open_v2
236 (MANDOC_DB, &db,
237 SQLITE_OPEN_READONLY, NULL);
239 if (SQLITE_OK != c) {
240 perror(MANDOC_DB);
241 sqlite3_close(db);
242 continue;
246 * Define the SQL functions for substring
247 * and regular expression matching.
250 c = sqlite3_create_function(db, "match", 2,
251 SQLITE_ANY, NULL, sql_match, NULL, NULL);
252 assert(SQLITE_OK == c);
253 c = sqlite3_create_function(db, "regexp", 2,
254 SQLITE_ANY, NULL, sql_regexp, NULL, NULL);
255 assert(SQLITE_OK == c);
257 j = 1;
258 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
259 if (SQLITE_OK != c)
260 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
262 for (ep = e; NULL != ep; ep = ep->next) {
263 if (NULL == ep->substr) {
264 SQL_BIND_BLOB(db, s, j, ep->regexp);
265 } else
266 SQL_BIND_TEXT(db, s, j, ep->substr);
267 SQL_BIND_INT64(db, s, j, ep->bits);
270 memset(&htab, 0, sizeof(struct ohash));
271 ohash_init(&htab, 4, &info);
274 * Hash each entry on its [unique] document identifier.
275 * This is a uint64_t.
276 * Instead of using a hash function, simply convert the
277 * uint64_t to a uint32_t, the hash value's type.
278 * This gives good performance and preserves the
279 * distribution of buckets in the table.
281 while (SQLITE_ROW == (c = sqlite3_step(s))) {
282 id = sqlite3_column_int64(s, 2);
283 idx = ohash_lookup_memory
284 (&htab, (char *)&id,
285 sizeof(uint64_t), (uint32_t)id);
287 if (NULL != ohash_find(&htab, idx))
288 continue;
290 mp = mandoc_calloc(1, sizeof(struct match));
291 mp->id = id;
292 mp->desc = mandoc_strdup
293 ((char *)sqlite3_column_text(s, 0));
294 mp->form = sqlite3_column_int(s, 1);
295 ohash_insert(&htab, idx, mp);
298 if (SQLITE_DONE != c)
299 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
301 sqlite3_finalize(s);
303 c = sqlite3_prepare_v2(db,
304 "SELECT * FROM mlinks WHERE pageid=?",
305 -1, &s, NULL);
306 if (SQLITE_OK != c)
307 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
309 c = sqlite3_prepare_v2(db,
310 "SELECT * FROM keys WHERE pageid=? AND bits & ?",
311 -1, &s2, NULL);
312 if (SQLITE_OK != c)
313 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
315 for (mp = ohash_first(&htab, &idx);
316 NULL != mp;
317 mp = ohash_next(&htab, &idx)) {
318 if (cur + 1 > maxres) {
319 maxres += 1024;
320 *res = mandoc_realloc
321 (*res, maxres * sizeof(struct manpage));
323 mpage = *res + cur;
324 mpage->desc = mp->desc;
325 mpage->form = mp->form;
326 buildnames(mpage, db, s, mp->id, paths->paths[i]);
327 mpage->output = outbit ?
328 buildoutput(db, s2, mp->id, outbit) : NULL;
330 free(mp);
331 cur++;
334 sqlite3_finalize(s);
335 sqlite3_finalize(s2);
336 sqlite3_close(db);
337 ohash_delete(&htab);
339 rc = 1;
340 out:
341 exprfree(e);
342 if (-1 != fd)
343 close(fd);
344 free(sql);
345 *sz = cur;
346 return(rc);
349 static void
350 buildnames(struct manpage *mpage, sqlite3 *db, sqlite3_stmt *s,
351 uint64_t id, const char *path)
353 char *newnames;
354 const char *oldnames, *sep1, *name, *sec, *sep2, *arch;
355 size_t i;
356 int c;
358 mpage->names = NULL;
359 i = 1;
360 SQL_BIND_INT64(db, s, i, id);
361 while (SQLITE_ROW == (c = sqlite3_step(s))) {
363 /* Assemble the list of names. */
365 if (NULL == mpage->names) {
366 oldnames = "";
367 sep1 = "";
368 } else {
369 oldnames = mpage->names;
370 sep1 = ", ";
372 sec = sqlite3_column_text(s, 1);
373 arch = sqlite3_column_text(s, 2);
374 name = sqlite3_column_text(s, 3);
375 sep2 = '\0' == *arch ? "" : "/";
376 if (-1 == asprintf(&newnames, "%s%s%s(%s%s%s)",
377 oldnames, sep1, name, sec, sep2, arch)) {
378 perror(0);
379 exit((int)MANDOCLEVEL_SYSERR);
381 free(mpage->names);
382 mpage->names = newnames;
384 /* Also save the first file name encountered. */
386 if (NULL != mpage->file)
387 continue;
389 name = sqlite3_column_text(s, 0);
390 if (-1 == asprintf(&mpage->file, "%s/%s", path, name)) {
391 perror(0);
392 exit((int)MANDOCLEVEL_SYSERR);
395 if (SQLITE_DONE != c)
396 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
397 sqlite3_reset(s);
400 static char *
401 buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit)
403 char *output, *newoutput;
404 const char *oldoutput, *sep1, *data;
405 size_t i;
406 int c;
408 output = NULL;
409 i = 1;
410 SQL_BIND_INT64(db, s, i, id);
411 SQL_BIND_INT64(db, s, i, outbit);
412 while (SQLITE_ROW == (c = sqlite3_step(s))) {
413 if (NULL == output) {
414 oldoutput = "";
415 sep1 = "";
416 } else {
417 oldoutput = output;
418 sep1 = " # ";
420 data = sqlite3_column_text(s, 1);
421 if (-1 == asprintf(&newoutput, "%s%s%s",
422 oldoutput, sep1, data)) {
423 perror(0);
424 exit((int)MANDOCLEVEL_SYSERR);
426 free(output);
427 output = newoutput;
429 if (SQLITE_DONE != c)
430 fprintf(stderr, "%s\n", sqlite3_errmsg(db));
431 sqlite3_reset(s);
432 return(output);
436 * Implement substring match as an application-defined SQL function.
437 * Using the SQL LIKE or GLOB operators instead would be a bad idea
438 * because that would require escaping metacharacters in the string
439 * being searched for.
441 static void
442 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
445 assert(2 == argc);
446 sqlite3_result_int(context, NULL != strcasestr(
447 (const char *)sqlite3_value_text(argv[1]),
448 (const char *)sqlite3_value_text(argv[0])));
452 * Implement regular expression match
453 * as an application-defined SQL function.
455 static void
456 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
459 assert(2 == argc);
460 sqlite3_result_int(context, !regexec(
461 (regex_t *)sqlite3_value_blob(argv[0]),
462 (const char *)sqlite3_value_text(argv[1]),
463 0, NULL, 0));
466 static void
467 sql_append(char **sql, size_t *sz, const char *newstr, int count)
469 size_t newsz;
471 newsz = 1 < count ? (size_t)count : strlen(newstr);
472 *sql = mandoc_realloc(*sql, *sz + newsz + 1);
473 if (1 < count)
474 memset(*sql + *sz, *newstr, (size_t)count);
475 else
476 memcpy(*sql + *sz, newstr, newsz);
477 *sz += newsz;
478 (*sql)[*sz] = '\0';
482 * Prepare the search SQL statement.
484 static char *
485 sql_statement(const struct expr *e)
487 char *sql;
488 size_t sz;
489 int needop;
491 sql = mandoc_strdup("SELECT * FROM mpages WHERE ");
492 sz = strlen(sql);
494 for (needop = 0; NULL != e; e = e->next) {
495 if (e->and)
496 sql_append(&sql, &sz, " AND ", 1);
497 else if (needop)
498 sql_append(&sql, &sz, " OR ", 1);
499 if (e->open)
500 sql_append(&sql, &sz, "(", e->open);
501 sql_append(&sql, &sz, NULL == e->substr ?
502 "id IN (SELECT pageid FROM keys "
503 "WHERE key REGEXP ? AND bits & ?)" :
504 "id IN (SELECT pageid FROM keys "
505 "WHERE key MATCH ? AND bits & ?)", 1);
506 if (e->close)
507 sql_append(&sql, &sz, ")", e->close);
508 needop = 1;
511 return(sql);
515 * Compile a set of string tokens into an expression.
516 * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
517 * "(", "foo=bar", etc.).
519 static struct expr *
520 exprcomp(const struct mansearch *search, int argc, char *argv[])
522 int i, toopen, logic, igncase, toclose;
523 struct expr *first, *next, *cur;
525 first = cur = NULL;
526 logic = igncase = toclose = 0;
527 toopen = 1;
529 for (i = 0; i < argc; i++) {
530 if (0 == strcmp("(", argv[i])) {
531 if (igncase)
532 goto fail;
533 toopen++;
534 toclose++;
535 continue;
536 } else if (0 == strcmp(")", argv[i])) {
537 if (toopen || logic || igncase || NULL == cur)
538 goto fail;
539 cur->close++;
540 if (0 > --toclose)
541 goto fail;
542 continue;
543 } else if (0 == strcmp("-a", argv[i])) {
544 if (toopen || logic || igncase || NULL == cur)
545 goto fail;
546 logic = 1;
547 continue;
548 } else if (0 == strcmp("-o", argv[i])) {
549 if (toopen || logic || igncase || NULL == cur)
550 goto fail;
551 logic = 2;
552 continue;
553 } else if (0 == strcmp("-i", argv[i])) {
554 if (igncase)
555 goto fail;
556 igncase = 1;
557 continue;
559 next = exprterm(search, argv[i], !igncase);
560 if (NULL == next)
561 goto fail;
562 next->open = toopen;
563 next->and = (1 == logic);
564 if (NULL != first) {
565 cur->next = next;
566 cur = next;
567 } else
568 cur = first = next;
569 toopen = logic = igncase = 0;
571 if (toopen || logic || igncase || toclose)
572 goto fail;
574 cur->close++;
575 cur = exprspec(cur, TYPE_arch, search->arch, "^(%s|any)$");
576 exprspec(cur, TYPE_sec, search->sec, "^%s$");
578 return(first);
580 fail:
581 if (NULL != first)
582 exprfree(first);
583 return(NULL);
586 static struct expr *
587 exprspec(struct expr *cur, uint64_t key, const char *value,
588 const char *format)
590 char errbuf[BUFSIZ];
591 char *cp;
592 int irc;
594 if (NULL == value)
595 return(cur);
597 if (-1 == asprintf(&cp, format, value)) {
598 perror(0);
599 exit((int)MANDOCLEVEL_SYSERR);
601 cur->next = mandoc_calloc(1, sizeof(struct expr));
602 cur = cur->next;
603 cur->and = 1;
604 cur->bits = key;
605 if (0 != (irc = regcomp(&cur->regexp, cp,
606 REG_EXTENDED | REG_NOSUB | REG_ICASE))) {
607 regerror(irc, &cur->regexp, errbuf, sizeof(errbuf));
608 fprintf(stderr, "regcomp: %s\n", errbuf);
609 cur->substr = value;
611 free(cp);
612 return(cur);
615 static struct expr *
616 exprterm(const struct mansearch *search, char *buf, int cs)
618 char errbuf[BUFSIZ];
619 struct expr *e;
620 char *key, *v;
621 size_t i;
622 int irc;
624 if ('\0' == *buf)
625 return(NULL);
627 e = mandoc_calloc(1, sizeof(struct expr));
629 /*"whatis" mode uses an opaque string and default fields. */
631 if (MANSEARCH_WHATIS & search->flags) {
632 e->substr = buf;
633 e->bits = search->deftype;
634 return(e);
638 * If no =~ is specified, search with equality over names and
639 * descriptions.
640 * If =~ begins the phrase, use name and description fields.
643 if (NULL == (v = strpbrk(buf, "=~"))) {
644 e->substr = buf;
645 e->bits = search->deftype;
646 return(e);
647 } else if (v == buf)
648 e->bits = search->deftype;
650 if ('~' == *v++) {
651 if (0 != (irc = regcomp(&e->regexp, v,
652 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)))) {
653 regerror(irc, &e->regexp, errbuf, sizeof(errbuf));
654 fprintf(stderr, "regcomp: %s\n", errbuf);
655 free(e);
656 return(NULL);
658 } else
659 e->substr = v;
660 v[-1] = '\0';
663 * Parse out all possible fields.
664 * If the field doesn't resolve, bail.
667 while (NULL != (key = strsep(&buf, ","))) {
668 if ('\0' == *key)
669 continue;
670 i = 0;
671 while (types[i].bits &&
672 strcasecmp(types[i].name, key))
673 i++;
674 if (0 == types[i].bits) {
675 free(e);
676 return(NULL);
678 e->bits |= types[i].bits;
681 return(e);
684 static void
685 exprfree(struct expr *p)
687 struct expr *pp;
689 while (NULL != p) {
690 pp = p->next;
691 free(p);
692 p = pp;
696 static void *
697 hash_halloc(size_t sz, void *arg)
700 return(mandoc_calloc(sz, 1));
703 static void *
704 hash_alloc(size_t sz, void *arg)
707 return(mandoc_malloc(sz));
710 static void
711 hash_free(void *p, size_t sz, void *arg)
714 free(p);