Don't leave junk files lying around the place
[findutils.git] / locate / locate.c
blobc94a826f1dcbb6084f6e347c2f97aa5396df667a
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
51 path remainders.
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
55 matching routines.
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
63 #include <config.h>
64 #include <stdio.h>
65 #include <ctype.h>
66 #include <sys/types.h>
67 #include <sys/stat.h>
68 #include <time.h>
69 #include <fnmatch.h>
70 #include <getopt.h>
71 #include <xstrtol.h>
73 /* The presence of unistd.h is assumed by gnulib these days, so we
74 * might as well assume it too.
76 /* We need <unistd.h> for isatty(). */
77 #include <unistd.h>
80 #define NDEBUG
81 #include <assert.h>
83 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
84 #include <string.h>
85 #else
86 #include <strings.h>
87 #define strchr index
88 #endif
90 #ifdef STDC_HEADERS
91 #include <stdlib.h>
92 #endif
94 #ifdef HAVE_ERRNO_H
95 #include <errno.h>
96 #else
97 extern int errno;
98 #endif
100 #ifdef HAVE_LOCALE_H
101 #include <locale.h>
102 #endif
104 #if ENABLE_NLS
105 # include <libintl.h>
106 # define _(Text) gettext (Text)
107 #else
108 # define _(Text) Text
109 #define textdomain(Domain)
110 #define bindtextdomain(Package, Directory)
111 #endif
112 #ifdef gettext_noop
113 # define N_(String) gettext_noop (String)
114 #else
115 /* We used to use (String) instead of just String, but apparentl;y ISO C
116 * doesn't allow this (at least, that's what HP said when someone reported
117 * this as a compiler bug). This is HP case number 1205608192. See
118 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
119 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
120 * like: static const char buf[] = ("string");
122 # define N_(String) String
123 #endif
125 #include "locatedb.h"
126 #include <getline.h>
127 #include "../gnulib/lib/xalloc.h"
128 #include "../gnulib/lib/error.h"
129 #include "../gnulib/lib/human.h"
130 #include "dirname.h"
131 #include "closeout.h"
132 #include "nextelem.h"
133 #include "regex.h"
134 #include "quote.h"
135 #include "quotearg.h"
136 #include "printquoted.h"
137 #include "regextype.h"
140 /* Note that this evaluates C many times. */
141 #ifdef _LIBC
142 # define TOUPPER(Ch) toupper (Ch)
143 # define TOLOWER(Ch) tolower (Ch)
144 #else
145 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
146 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
147 #endif
149 /* typedef enum {false, true} boolean; */
151 /* Warn if a database is older than this. 8 days allows for a weekly
152 update that takes up to a day to perform. */
153 #define WARN_NUMBER_UNITS (8)
154 /* Printable name of units used in WARN_SECONDS */
155 static const char warn_name_units[] = N_("days");
156 #define SECONDS_PER_UNIT (60 * 60 * 24)
158 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
160 enum visit_result
162 VISIT_CONTINUE = 1, /* please call the next visitor */
163 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
164 VISIT_REJECTED = 4, /* rejected, process next file. */
165 VISIT_ABORT = 8 /* rejected, process no more files. */
168 enum ExistenceCheckType
170 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
171 ACCEPT_EXISTING, /* Corresponds to option -e */
172 ACCEPT_NON_EXISTING /* Corresponds to option -E */
175 /* Check for existence of files before printing them out? */
176 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
178 static int follow_symlinks = 1;
180 /* What to separate the results with. */
181 static int separator = '\n';
183 static struct quoting_options * quote_opts = NULL;
184 static bool stdout_is_a_tty;
185 static bool print_quoted_filename;
187 /* Read in a 16-bit int, high byte first (network byte order). */
189 static short
190 get_short (FILE *fp)
193 register short x;
195 x = (signed char) fgetc (fp) << 8;
196 x |= (fgetc (fp) & 0xff);
197 return x;
200 const char * const metacharacters = "*?[]\\";
202 /* Return nonzero if S contains any shell glob characters.
204 static int
205 contains_metacharacter(const char *s)
207 if (NULL == strpbrk(s, metacharacters))
208 return 0;
209 else
210 return 1;
213 /* locate_read_str()
215 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
216 * until we reach DELIMITER or end-of-file. We reallocate the buffer
217 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
218 * is made regarding the content of the data (i.e. the implementation is
219 * 8-bit clean, the only delimiter is DELIMITER).
221 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
222 * has been removed from gnulib.
224 * We call the function locate_read_str() to avoid a name clash with the curses
225 * function getstr().
227 static int
228 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
230 char * p = NULL;
231 size_t sz = 0;
232 int needed, nread;
234 nread = getdelim(&p, &sz, delimiter, fp);
235 if (nread >= 0)
237 assert(p != NULL);
239 needed = offs + nread + 1;
240 if (needed > (*siz))
242 char *pnew = realloc(*buf, needed);
243 if (NULL == pnew)
245 return -1; /* FAIL */
247 else
249 *siz = needed;
250 *buf = pnew;
253 memcpy((*buf)+offs, p, nread);
254 free(p);
256 return nread;
260 static void
261 lc_strcpy(char *dest, const char *src)
263 while (*src)
265 *dest++ = TOLOWER(*src);
266 ++src;
268 *dest = 0;
271 struct locate_limits
273 uintmax_t limit;
274 uintmax_t items_accepted;
276 static struct locate_limits limits;
279 struct locate_stats
281 uintmax_t compressed_bytes;
282 uintmax_t total_filename_count;
283 uintmax_t total_filename_length;
284 uintmax_t whitespace_count;
285 uintmax_t newline_count;
286 uintmax_t highbit_filename_count;
288 static struct locate_stats statistics;
291 struct stringbuf
293 char *buffer;
294 size_t buffersize;
295 size_t *preqlen;
297 static struct stringbuf casebuf;
300 struct casefolder
302 const char *pattern;
303 struct stringbuf *pbuf;
306 struct regular_expression
308 struct re_pattern_buffer regex; /* for --regex */
312 struct process_data
314 int c; /* An input byte. */
315 int count; /* The length of the prefix shared with the previous database entry. */
316 int len;
317 char *original_filename; /* The current input database entry. */
318 size_t pathsize; /* Amount allocated for it. */
319 char *munged_filename; /* path or base_name(path) */
320 FILE *fp; /* The pathname database. */
321 char *dbfile; /* Its name, or "<stdin>" */
322 /* for the old database format,
323 the first and second characters of the most common bigrams. */
324 char bigram1[128];
325 char bigram2[128];
329 typedef int (*visitfunc)(struct process_data *procdata,
330 void *context);
332 struct visitor
334 visitfunc inspector;
335 void * context;
336 struct visitor *next;
340 static struct visitor *inspectors = NULL;
341 static struct visitor *lastinspector = NULL;
342 static struct visitor *past_pat_inspector = NULL;
344 /* 0 or 1 pattern(s) */
345 static int
346 process_simple(struct process_data *procdata)
348 int result = VISIT_CONTINUE;
349 const struct visitor *p = inspectors;
351 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
353 result = (p->inspector)(procdata, p->context);
354 p = p->next;
357 return result;
360 /* Accept if any pattern matches. */
361 static int
362 process_or (struct process_data *procdata)
364 int result = VISIT_CONTINUE;
365 const struct visitor *p = inspectors;
367 while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector != p) )
369 result = (p->inspector)(procdata, p->context);
370 p = p->next;
373 if (result == VISIT_CONTINUE)
374 result = VISIT_REJECTED;
375 if (result & (VISIT_ABORT | VISIT_REJECTED))
376 return result;
378 p = past_pat_inspector;
379 result = VISIT_CONTINUE;
381 while ( (VISIT_CONTINUE == result) && (NULL != p) )
383 result = (p->inspector)(procdata, p->context);
384 p = p->next;
387 if (VISIT_CONTINUE == result)
388 return VISIT_ACCEPTED;
389 else
390 return result;
393 /* Accept if all pattern match. */
394 static int
395 process_and (struct process_data *procdata)
397 int result = VISIT_CONTINUE;
398 const struct visitor *p = inspectors;
400 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (past_pat_inspector != p) )
402 result = (p->inspector)(procdata, p->context);
403 p = p->next;
406 if (result == VISIT_CONTINUE)
407 result = VISIT_REJECTED;
408 if (result & (VISIT_ABORT | VISIT_REJECTED))
409 return result;
411 p = past_pat_inspector;
412 result = VISIT_CONTINUE;
414 while ( (VISIT_CONTINUE == result) && (NULL != p) )
416 result = (p->inspector)(procdata, p->context);
417 p = p->next;
420 if (VISIT_CONTINUE == result)
421 return VISIT_ACCEPTED;
422 else
423 return result;
426 typedef int (*processfunc)(struct process_data *procdata);
428 static processfunc mainprocessor = NULL;
430 static void
431 add_visitor(visitfunc fn, void *context)
433 struct visitor *p = xmalloc(sizeof(struct visitor));
434 p->inspector = fn;
435 p->context = context;
436 p->next = NULL;
438 if (NULL == lastinspector)
440 lastinspector = inspectors = p;
442 else
444 lastinspector->next = p;
445 lastinspector = p;
451 static int
452 visit_justprint_quoted(struct process_data *procdata, void *context)
454 (void) context;
455 print_quoted (stdout, quote_opts, stdout_is_a_tty,
456 "%s",
457 procdata->original_filename);
458 putchar(separator);
459 return VISIT_CONTINUE;
462 static int
463 visit_justprint_unquoted(struct process_data *procdata, void *context)
465 (void) context;
466 fputs(procdata->original_filename, stdout);
467 putchar(separator);
468 return VISIT_CONTINUE;
471 static int
472 visit_old_format(struct process_data *procdata, void *context)
474 register char *s;
475 (void) context;
477 /* Get the offset in the path where this path info starts. */
478 if (procdata->c == LOCATEDB_OLD_ESCAPE)
479 procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
480 else
481 procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
483 /* Overlay the old path with the remainder of the new. */
484 for (s = procdata->original_filename + procdata->count;
485 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
486 if (procdata->c < 0200)
487 *s++ = procdata->c; /* An ordinary character. */
488 else
490 /* Bigram markers have the high bit set. */
491 procdata->c &= 0177;
492 *s++ = procdata->bigram1[procdata->c];
493 *s++ = procdata->bigram2[procdata->c];
495 *s-- = '\0';
497 procdata->munged_filename = procdata->original_filename;
499 return VISIT_CONTINUE;
503 static int
504 visit_locate02_format(struct process_data *procdata, void *context)
506 register char *s;
507 int nread;
508 (void) context;
510 if (procdata->c == LOCATEDB_ESCAPE)
511 procdata->count += (short)get_short (procdata->fp);
512 else if (procdata->c > 127)
513 procdata->count += procdata->c - 256;
514 else
515 procdata->count += procdata->c;
517 if (procdata->count > procdata->len || procdata->count < 0)
519 /* This should not happen generally , but since we're
520 * reading in data which is outside our control, we
521 * cannot prevent it.
523 error(1, 0, _("locate database `%s' is corrupt or invalid"), procdata->dbfile);
526 /* Overlay the old path with the remainder of the new. */
527 nread = locate_read_str (&procdata->original_filename, &procdata->pathsize,
528 procdata->fp, 0, procdata->count);
529 if (nread < 0)
530 return VISIT_ABORT;
531 procdata->c = getc (procdata->fp);
532 procdata->len = procdata->count + nread;
533 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
534 assert (s[0] != '\0');
535 assert (s[1] == '\0'); /* Our terminator. */
536 assert (s[2] == '\0'); /* Added by locate_read_str. */
538 procdata->munged_filename = procdata->original_filename;
540 return VISIT_CONTINUE;
543 static int
544 visit_basename(struct process_data *procdata, void *context)
546 (void) context;
547 procdata->munged_filename = base_name(procdata->original_filename);
549 return VISIT_CONTINUE;
553 static int
554 visit_casefold(struct process_data *procdata, void *context)
556 struct stringbuf *b = context;
558 if (*b->preqlen+1 > b->buffersize)
560 b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
561 b->buffersize = *b->preqlen+1;
563 lc_strcpy(b->buffer, procdata->munged_filename);
565 return VISIT_CONTINUE;
568 /* visit_existing_follow implements -L -e */
569 static int
570 visit_existing_follow(struct process_data *procdata, void *context)
572 struct stat st;
573 (void) context;
575 /* munged_filename has been converted in some way (to lower case,
576 * or is just the base name of the file), and original_filename has not.
577 * Hence only original_filename is still actually the name of the file
578 * whose existence we would need to check.
580 if (stat(procdata->original_filename, &st) != 0)
582 return VISIT_REJECTED;
584 else
586 return VISIT_CONTINUE;
590 /* visit_non_existing_follow implements -L -E */
591 static int
592 visit_non_existing_follow(struct process_data *procdata, void *context)
594 struct stat st;
595 (void) context;
597 /* munged_filename has been converted in some way (to lower case,
598 * or is just the base name of the file), and original_filename has not.
599 * Hence only original_filename is still actually the name of the file
600 * whose existence we would need to check.
602 if (stat(procdata->original_filename, &st) == 0)
604 return VISIT_REJECTED;
606 else
608 return VISIT_CONTINUE;
612 /* visit_existing_nofollow implements -P -e */
613 static int
614 visit_existing_nofollow(struct process_data *procdata, void *context)
616 struct stat st;
617 (void) context;
619 /* munged_filename has been converted in some way (to lower case,
620 * or is just the base name of the file), and original_filename has not.
621 * Hence only original_filename is still actually the name of the file
622 * whose existence we would need to check.
624 if (lstat(procdata->original_filename, &st) != 0)
626 return VISIT_REJECTED;
628 else
630 return VISIT_CONTINUE;
634 /* visit_non_existing_nofollow implements -P -E */
635 static int
636 visit_non_existing_nofollow(struct process_data *procdata, void *context)
638 struct stat st;
639 (void) context;
641 /* munged_filename has been converted in some way (to lower case,
642 * or is just the base name of the file), and original_filename has not.
643 * Hence only original_filename is still actually the name of the file
644 * whose existence we would need to check.
646 if (lstat(procdata->original_filename, &st) == 0)
648 return VISIT_REJECTED;
650 else
652 return VISIT_CONTINUE;
656 static int
657 visit_substring_match_nocasefold(struct process_data *procdata, void *context)
659 const char *pattern = context;
661 if (NULL != strstr(procdata->munged_filename, pattern))
662 return VISIT_ACCEPTED;
663 else
664 return VISIT_REJECTED;
667 static int
668 visit_substring_match_casefold(struct process_data *procdata, void *context)
670 const struct casefolder * p = context;
671 const struct stringbuf * b = p->pbuf;
672 (void) procdata;
674 if (NULL != strstr(b->buffer, p->pattern))
675 return VISIT_ACCEPTED;
676 else
677 return VISIT_REJECTED;
681 static int
682 visit_globmatch_nofold(struct process_data *procdata, void *context)
684 const char *glob = context;
685 if (fnmatch(glob, procdata->munged_filename, 0) != 0)
686 return VISIT_REJECTED;
687 else
688 return VISIT_ACCEPTED;
692 static int
693 visit_globmatch_casefold(struct process_data *procdata, void *context)
695 const char *glob = context;
696 if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
697 return VISIT_REJECTED;
698 else
699 return VISIT_ACCEPTED;
703 static int
704 visit_regex(struct process_data *procdata, void *context)
706 struct regular_expression *p = context;
707 const size_t len = strlen(procdata->munged_filename);
709 int rv = re_search (&p->regex, procdata->munged_filename,
710 len, 0, len,
711 (struct re_registers *) NULL);
712 if (rv < 0)
714 return VISIT_REJECTED; /* no match (-1), or internal error (-2) */
716 else
718 return VISIT_ACCEPTED; /* match */
723 static int
724 visit_stats(struct process_data *procdata, void *context)
726 struct locate_stats *p = context;
727 size_t len = strlen(procdata->original_filename);
728 const char *s;
729 int highbit, whitespace, newline;
731 ++(p->total_filename_count);
732 p->total_filename_length += len;
734 highbit = whitespace = newline = 0;
735 for (s=procdata->original_filename; *s; ++s)
737 if ( (int)(*s) & 128 )
738 highbit = 1;
739 if ('\n' == *s)
741 newline = whitespace = 1;
743 else if (isspace((unsigned char)*s))
745 whitespace = 1;
749 if (highbit)
750 ++(p->highbit_filename_count);
751 if (whitespace)
752 ++(p->whitespace_count);
753 if (newline)
754 ++(p->newline_count);
756 return VISIT_CONTINUE;
760 static int
761 visit_limit(struct process_data *procdata, void *context)
763 struct locate_limits *p = context;
765 (void) procdata;
767 if (++p->items_accepted >= p->limit)
768 return VISIT_ABORT;
769 else
770 return VISIT_CONTINUE;
773 static int
774 visit_count(struct process_data *procdata, void *context)
776 struct locate_limits *p = context;
778 (void) procdata;
780 ++p->items_accepted;
781 return VISIT_CONTINUE;
784 /* Emit the statistics.
786 static void
787 print_stats(int argc, size_t database_file_size)
789 char hbuf[LONGEST_HUMAN_READABLE + 1];
791 printf(_("Locate database size: %s bytes\n"),
792 human_readable ((uintmax_t) database_file_size,
793 hbuf, human_ceiling, 1, 1));
795 printf(_("Filenames: %s "),
796 human_readable (statistics.total_filename_count,
797 hbuf, human_ceiling, 1, 1));
798 printf(_("with a cumulative length of %s bytes"),
799 human_readable (statistics.total_filename_length,
800 hbuf, human_ceiling, 1, 1));
802 printf(_("\n\tof which %s contain whitespace, "),
803 human_readable (statistics.whitespace_count,
804 hbuf, human_ceiling, 1, 1));
805 printf(_("\n\t%s contain newline characters, "),
806 human_readable (statistics.newline_count,
807 hbuf, human_ceiling, 1, 1));
808 printf(_("\n\tand %s contain characters with the high bit set.\n"),
809 human_readable (statistics.highbit_filename_count,
810 hbuf, human_ceiling, 1, 1));
812 if (!argc)
813 printf(_("Compression ratio %4.2f%%\n"),
814 100.0 * ((double)statistics.total_filename_length
815 - (double) database_file_size)
816 / (double) statistics.total_filename_length);
817 printf("\n");
821 /* Print or count the entries in DBFILE that match shell globbing patterns in
822 ARGV. Return the number of entries matched. */
824 static unsigned long
825 locate (int argc,
826 char **argv,
827 char *dbfile,
828 int ignore_case,
829 int enable_print,
830 int basename_only,
831 int use_limit,
832 struct locate_limits *plimit,
833 int stats,
834 int op_and,
835 int regex,
836 int regex_options)
838 char *pathpart; /* A pattern to consider. */
839 int argn; /* Index to current pattern in argv. */
840 int need_fold; /* Set when folding and any pattern is non-glob. */
841 int nread; /* number of bytes read from an entry. */
842 struct process_data procdata; /* Storage for data shared with visitors. */
844 int old_format = 0; /* true if reading a bigram-encoded database. */
845 static bool did_stdin = false; /* Set to prevent rereading stdin. */
846 struct visitor* pvis; /* temp for determining past_pat_inspector. */
848 /* To check the age of the database. */
849 struct stat st;
850 time_t now;
853 if (ignore_case)
854 regex_options |= RE_ICASE;
856 procdata.len = procdata.count = 0;
857 if (!strcmp (dbfile, "-"))
859 if (did_stdin)
861 error (0, 0, _("warning: the locate database can only be read from stdin once."));
862 return 0;
866 procdata.dbfile = "<stdin>";
867 procdata.fp = stdin;
868 did_stdin = true;
870 else
872 if (stat (dbfile, &st) || (procdata.fp = fopen (dbfile, "r")) == NULL)
874 error (0, errno, "%s", dbfile);
875 return 0;
877 time(&now);
878 if (now - st.st_mtime > WARN_SECONDS)
880 /* For example:
881 warning: database `fred' is more than 8 days old */
882 error (0, 0, _("warning: database `%s' is more than %d %s old"),
883 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
885 procdata.dbfile = dbfile;
888 procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
889 procdata.original_filename = xmalloc (procdata.pathsize);
891 nread = fread (procdata.original_filename, 1, sizeof (LOCATEDB_MAGIC),
892 procdata.fp);
893 if (nread != sizeof (LOCATEDB_MAGIC)
894 || memcmp (procdata.original_filename, LOCATEDB_MAGIC,
895 sizeof (LOCATEDB_MAGIC)))
897 int i;
898 /* Read the list of the most common bigrams in the database. */
899 nread = fread (procdata.original_filename + sizeof (LOCATEDB_MAGIC), 1,
900 256 - sizeof (LOCATEDB_MAGIC), procdata.fp);
901 for (i = 0; i < 128; i++)
903 procdata.bigram1[i] = procdata.original_filename[i << 1];
904 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
906 old_format = 1;
909 /* Set up the inspection regime */
910 inspectors = NULL;
911 lastinspector = NULL;
912 past_pat_inspector = NULL;
914 if (old_format)
915 add_visitor(visit_old_format, NULL);
916 else
917 add_visitor(visit_locate02_format, NULL);
919 if (basename_only)
920 add_visitor(visit_basename, NULL);
922 /* See if we need fold. */
923 if (ignore_case && !regex)
924 for ( argn = 0; argn < argc; argn++ )
926 pathpart = argv[argn];
927 if (!contains_metacharacter(pathpart))
929 need_fold = 1;
930 break;
934 if (need_fold)
936 add_visitor(visit_casefold, &casebuf);
937 casebuf.preqlen = &procdata.pathsize;
940 /* Add an inspector for each pattern we're looking for. */
941 for ( argn = 0; argn < argc; argn++ )
943 pathpart = argv[argn];
944 if (regex)
946 struct regular_expression *p = xmalloc(sizeof(*p));
947 const char *error_message = NULL;
949 memset (&p->regex, 0, sizeof (p->regex));
951 re_set_syntax(regex_options);
952 p->regex.allocated = 100;
953 p->regex.buffer = (unsigned char *) xmalloc (p->regex.allocated);
954 p->regex.fastmap = NULL;
955 p->regex.syntax = regex_options;
956 p->regex.translate = NULL;
958 error_message = re_compile_pattern (pathpart, strlen (pathpart),
959 &p->regex);
960 if (error_message)
962 error (1, 0, "%s", error_message);
964 else
966 add_visitor(visit_regex, p);
969 else if (contains_metacharacter(pathpart))
971 if (ignore_case)
972 add_visitor(visit_globmatch_casefold, pathpart);
973 else
974 add_visitor(visit_globmatch_nofold, pathpart);
976 else
978 /* No glob characters used. Hence we match on
979 * _any part_ of the filename, not just the
980 * basename. This seems odd to me, but it is the
981 * traditional behaviour.
982 * James Youngman <jay@gnu.org>
984 if (ignore_case)
986 struct casefolder * cf = xmalloc(sizeof(*cf));
987 cf->pattern = pathpart;
988 cf->pbuf = &casebuf;
989 add_visitor(visit_substring_match_casefold, cf);
990 /* If we ignore case, convert it to lower now so we don't have to
991 * do it every time
993 lc_strcpy(pathpart, pathpart);
995 else
997 add_visitor(visit_substring_match_nocasefold, pathpart);
1002 pvis = lastinspector;
1004 /* We add visit_existing_*() as late as possible to reduce the
1005 * number of stat() calls.
1007 switch (check_existence)
1009 case ACCEPT_EXISTING:
1010 if (follow_symlinks) /* -L, default */
1011 add_visitor(visit_existing_follow, NULL);
1012 else /* -P */
1013 add_visitor(visit_existing_nofollow, NULL);
1014 break;
1016 case ACCEPT_NON_EXISTING:
1017 if (follow_symlinks) /* -L, default */
1018 add_visitor(visit_non_existing_follow, NULL);
1019 else /* -P */
1020 add_visitor(visit_non_existing_nofollow, NULL);
1021 break;
1023 case ACCEPT_EITHER: /* Default, neither -E nor -e */
1024 /* do nothing; no extra processing. */
1025 break;
1028 if (stats)
1029 add_visitor(visit_stats, &statistics);
1031 if (enable_print)
1033 if (print_quoted_filename)
1034 add_visitor(visit_justprint_quoted, NULL);
1035 else
1036 add_visitor(visit_justprint_unquoted, NULL);
1040 if (use_limit)
1041 add_visitor(visit_limit, plimit);
1042 else
1043 add_visitor(visit_count, plimit);
1046 if (argc > 1)
1048 past_pat_inspector = pvis->next;
1049 if (op_and)
1050 mainprocessor = process_and;
1051 else
1052 mainprocessor = process_or;
1054 else
1055 mainprocessor = process_simple;
1057 if (stats)
1059 printf(_("Database %s is in the %s format.\n"),
1060 procdata.dbfile,
1061 old_format ? _("old") : "LOCATE02");
1065 procdata.c = getc (procdata.fp);
1066 /* If we are searching for filename patterns, the inspector list
1067 * will contain an entry for each pattern for which we are searching.
1069 while ( (procdata.c != EOF) &&
1070 (VISIT_ABORT != (mainprocessor)(&procdata)) )
1072 /* Do nothing; all the work is done in the visitor functions. */
1075 if (stats)
1077 print_stats(argc, st.st_size);
1080 if (ferror (procdata.fp))
1082 error (0, errno, "%s", procdata.dbfile);
1083 return 0;
1085 if (procdata.fp != stdin && fclose (procdata.fp) == EOF)
1087 error (0, errno, "%s", dbfile);
1088 return 0;
1091 return plimit->items_accepted;
1097 extern char *version_string;
1099 /* The name this program was run with. */
1100 char *program_name;
1102 static void
1103 usage (FILE *stream)
1105 fprintf (stream, _("\
1106 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1107 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1108 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1109 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1110 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1111 [-version] [--help]\n\
1112 pattern...\n"),
1113 program_name);
1114 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1116 enum
1118 REGEXTYPE_OPTION = CHAR_MAX + 1
1122 static struct option const longopts[] =
1124 {"database", required_argument, NULL, 'd'},
1125 {"existing", no_argument, NULL, 'e'},
1126 {"non-existing", no_argument, NULL, 'E'},
1127 {"ignore-case", no_argument, NULL, 'i'},
1128 {"all", no_argument, NULL, 'A'},
1129 {"help", no_argument, NULL, 'h'},
1130 {"version", no_argument, NULL, 'v'},
1131 {"null", no_argument, NULL, '0'},
1132 {"count", no_argument, NULL, 'c'},
1133 {"wholename", no_argument, NULL, 'w'},
1134 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1135 {"basename", no_argument, NULL, 'b'},
1136 {"print", no_argument, NULL, 'p'},
1137 {"stdio", no_argument, NULL, 's'},
1138 {"mmap", no_argument, NULL, 'm'},
1139 {"limit", required_argument, NULL, 'l'},
1140 {"regex", no_argument, NULL, 'r'},
1141 {"regextype", required_argument, NULL, REGEXTYPE_OPTION},
1142 {"statistics", no_argument, NULL, 'S'},
1143 {"follow", no_argument, NULL, 'L'},
1144 {"nofollow", no_argument, NULL, 'P'},
1145 {NULL, no_argument, NULL, 0}
1149 main (int argc, char **argv)
1151 char *dbpath;
1152 unsigned long int found = 0uL;
1153 int optc;
1154 int ignore_case = 0;
1155 int print = 0;
1156 int just_count = 0;
1157 int basename_only = 0;
1158 int use_limit = 0;
1159 int regex = 0;
1160 int regex_options = RE_SYNTAX_EMACS;
1161 int stats = 0;
1162 int op_and = 0;
1163 char *e;
1165 program_name = argv[0];
1167 #ifdef HAVE_SETLOCALE
1168 setlocale (LC_ALL, "");
1169 #endif
1170 bindtextdomain (PACKAGE, LOCALEDIR);
1171 textdomain (PACKAGE);
1172 atexit (close_stdout);
1174 limits.limit = 0;
1175 limits.items_accepted = 0;
1177 quote_opts = clone_quoting_options (NULL);
1178 print_quoted_filename = true;
1180 dbpath = getenv ("LOCATE_PATH");
1181 if (dbpath == NULL)
1182 dbpath = LOCATE_DB;
1184 check_existence = ACCEPT_EITHER;
1186 while ((optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
1187 switch (optc)
1189 case '0':
1190 separator = 0;
1191 print_quoted_filename = false; /* print filename 'raw'. */
1192 break;
1194 case 'A':
1195 op_and = 1;
1196 break;
1198 case 'b':
1199 basename_only = 1;
1200 break;
1202 case 'c':
1203 just_count = 1;
1204 break;
1206 case 'd':
1207 dbpath = optarg;
1208 break;
1210 case 'e':
1211 check_existence = ACCEPT_EXISTING;
1212 break;
1214 case 'E':
1215 check_existence = ACCEPT_NON_EXISTING;
1216 break;
1218 case 'i':
1219 ignore_case = 1;
1220 break;
1222 case 'h':
1223 usage (stdout);
1224 return 0;
1226 case 'p':
1227 print = 1;
1228 break;
1230 case 'v':
1231 printf (_("GNU locate version %s\n"), version_string);
1232 return 0;
1234 case 'w':
1235 basename_only = 0;
1236 break;
1238 case 'r':
1239 regex = 1;
1240 break;
1242 case REGEXTYPE_OPTION:
1243 regex_options = get_regex_type(optarg);
1244 break;
1246 case 'S':
1247 stats = 1;
1248 break;
1250 case 'L':
1251 follow_symlinks = 1;
1252 break;
1254 /* In find, -P and -H differ in the way they handle paths
1255 * given on the command line. This is not relevant for
1256 * locate, but the -H option is supported because it is
1257 * probably more intuitive to do so.
1259 case 'P':
1260 case 'H':
1261 follow_symlinks = 0;
1262 break;
1264 case 'l':
1266 char *end = optarg;
1267 strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
1268 if (LONGINT_OK != err)
1270 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
1272 use_limit = 1;
1274 break;
1276 case 's': /* use stdio */
1277 case 'm': /* use mmap */
1278 /* These options are implemented simply for
1279 * compatibility with FreeBSD
1281 break;
1283 default:
1284 usage (stderr);
1285 return 1;
1288 if (!just_count && !stats)
1289 print = 1;
1291 if (stats)
1293 if (optind == argc)
1294 use_limit = 0;
1296 else
1298 if (!just_count && optind == argc)
1300 usage (stderr);
1301 return 1;
1306 if (1 == isatty(STDOUT_FILENO))
1307 stdout_is_a_tty = true;
1308 else
1309 stdout_is_a_tty = false;
1311 next_element (dbpath, 0); /* Initialize. */
1313 /* Bail out early if limit already reached. */
1314 while ((e = next_element ((char *) NULL, 0)) != NULL &&
1315 (!use_limit || limits.limit > limits.items_accepted))
1317 statistics.compressed_bytes =
1318 statistics.total_filename_count =
1319 statistics.total_filename_length =
1320 statistics.whitespace_count =
1321 statistics.newline_count =
1322 statistics.highbit_filename_count = 0u;
1324 if (0 == strlen(e) || 0 == strcmp(e, "."))
1326 /* Use the default database name instead (note: we
1327 * don't use 'dbpath' since that might itself contain a
1328 * colon-separated list.
1330 e = LOCATE_DB;
1333 found = locate (argc - optind, &argv[optind], e, ignore_case, print, basename_only, use_limit, &limits, stats, op_and, regex, regex_options);
1336 if (just_count)
1338 printf("%ld\n", found);
1341 if (found || (use_limit && (limits.limit==0)) || stats )
1342 return 0;
1343 else
1344 return 1;