1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 /* Usage: locate [options] pattern...
21 Scan a pathname list for the full pathname of a file, given only
22 a piece of the name (possibly containing shell globbing metacharacters).
23 The list has been processed with front-compression, which reduces
24 the list size by a factor of 4-5.
25 Recognizes two database formats, old and new. The old format is
26 bigram coded, which reduces space by a further 20-25% and uses the
27 following encoding of the database bytes:
29 0-28 likeliest differential counts + offset (14) to make nonnegative
30 30 escape code for out-of-range count to follow in next halfword
31 128-255 bigram codes (the 128 most common, as determined by `updatedb')
32 32-127 single character (printable) ASCII remainder
34 Earlier versions of GNU locate used to use a novel two-tiered
35 string search technique, which was described in Usenix ;login:, Vol
36 8, No 1, February/March, 1983, p. 8.
38 However, latterly code changes to provide additional functionality
39 became dificult to make with the existing reading scheme, and so
40 we no longer perform the matching as efficiently as we used to (that is,
41 we no longer use the same algorithm).
43 The old algorithm was:
45 First, match a metacharacter-free subpattern and a partial
46 pathname BACKWARDS to avoid full expansion of the pathname list.
47 The time savings is 40-50% over forward matching, which cannot
48 efficiently handle overlapped search patterns and compressed
51 Then, match the actual shell glob pattern (if in this form)
52 against the candidate pathnames using the slower shell filename
56 Written by James A. Woods <jwoods@adobe.com>.
57 Modified by David MacKenzie <djm@gnu.org>.
58 Additional work by James Youngman and Bas van Gompel.
66 #include <sys/types.h>
67 #include <grp.h> /* for setgroups() */
74 #include <stdbool.h> /* for bool/boolean */
76 /* The presence of unistd.h is assumed by gnulib these days, so we
77 * might as well assume it too.
79 /* We need <unistd.h> for isatty(). */
100 # include <libintl.h>
101 # define _(Text) gettext (Text)
103 # define _(Text) Text
104 #define textdomain(Domain)
105 #define bindtextdomain(Package, Directory)
106 #define ngettext(singular,plural,n) ((1==n) ? singular : plural)
109 # define N_(String) gettext_noop (String)
111 /* We used to use (String) instead of just String, but apparently ISO C
112 * doesn't allow this (at least, that's what HP said when someone reported
113 * this as a compiler bug). This is HP case number 1205608192. See
114 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
115 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
116 * like: static const char buf[] = ("string");
118 # define N_(String) String
121 #include "locatedb.h"
126 #include "closeout.h"
127 #include "nextelem.h"
130 #include "quotearg.h"
131 #include "printquoted.h"
132 #include "regextype.h"
133 #include "findutils-version.h"
135 /* Note that this evaluates Ch many times. */
137 # define TOUPPER(Ch) toupper (Ch)
138 # define TOLOWER(Ch) tolower (Ch)
140 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
141 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
144 /* typedef enum {false, true} boolean; */
146 /* Warn if a database is older than this. 8 days allows for a weekly
147 update that takes up to a day to perform. */
148 static unsigned int warn_number_units
= 8;
150 /* Printable name of units used in WARN_SECONDS */
151 static const char warn_name_units
[] = N_("days");
152 #define SECONDS_PER_UNIT (60 * 60 * 24)
156 VISIT_CONTINUE
= 1, /* please call the next visitor */
157 VISIT_ACCEPTED
= 2, /* accepted, call no futher callbacks for this file */
158 VISIT_REJECTED
= 4, /* rejected, process next file. */
159 VISIT_ABORT
= 8 /* rejected, process no more files. */
162 enum ExistenceCheckType
164 ACCEPT_EITHER
, /* Corresponds to lack of -E/-e option */
165 ACCEPT_EXISTING
, /* Corresponds to option -e */
166 ACCEPT_NON_EXISTING
/* Corresponds to option -E */
169 /* Check for existence of files before printing them out? */
170 enum ExistenceCheckType check_existence
= ACCEPT_EITHER
;
172 static int follow_symlinks
= 1;
174 /* What to separate the results with. */
175 static int separator
= '\n';
177 static struct quoting_options
* quote_opts
= NULL
;
178 static bool stdout_is_a_tty
;
179 static bool print_quoted_filename
;
180 static bool results_were_filtered
;
182 static const char *selected_secure_db
= NULL
;
185 /* Change the number of days old the database can be
186 * before we complain about it.
189 set_max_db_age(const char *s
)
192 unsigned long int val
;
193 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
198 _("The argument for option --max-database-age must not be empty"));
202 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
203 * we would not be able to tell if that is the correct answer, or whether it
204 * signifies an error.
207 val
= strtoul(s
, &end
, 10);
209 /* Diagnose number too large, non-numbes and trailing junk. */
210 if ((ULONG_MAX
== val
&& ERANGE
== errno
) ||
211 (0 == val
&& EINVAL
== errno
))
214 _("Invalid argument %s for option --max-database-age"),
215 quotearg_n_style(0, locale_quoting_style
, s
));
219 /* errno wasn't set, don't print its message */
221 _("Invalid argument %s for option --max-database-age"),
222 quotearg_n_style(0, locale_quoting_style
, s
));
226 warn_number_units
= val
;
232 /* Read in a 16-bit int, high byte first (network byte order). */
240 x
= (signed char) fgetc (fp
) << 8;
241 x
|= (fgetc (fp
) & 0xff);
245 const char * const metacharacters
= "*?[]\\";
247 /* Return nonzero if S contains any shell glob characters.
250 contains_metacharacter(const char *s
)
252 if (NULL
== strpbrk(s
, metacharacters
))
260 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
261 * until we reach DELIMITER or end-of-file. We reallocate the buffer
262 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
263 * is made regarding the content of the data (i.e. the implementation is
264 * 8-bit clean, the only delimiter is DELIMITER).
266 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
267 * has been removed from gnulib.
269 * We call the function locate_read_str() to avoid a name clash with the curses
273 locate_read_str(char **buf
, size_t *siz
, FILE *fp
, int delimiter
, int offs
)
280 nread
= getdelim(&p
, &sz
, delimiter
, fp
);
285 needed
= offs
+ nread
+ 1u;
288 char *pnew
= realloc(*buf
, needed
);
291 return -1; /* FAIL */
299 memcpy((*buf
)+offs
, p
, nread
);
309 uintmax_t items_accepted
;
311 static struct locate_limits limits
;
316 uintmax_t compressed_bytes
;
317 uintmax_t total_filename_count
;
318 uintmax_t total_filename_length
;
319 uintmax_t whitespace_count
;
320 uintmax_t newline_count
;
321 uintmax_t highbit_filename_count
;
323 static struct locate_stats statistics
;
326 struct regular_expression
328 struct re_pattern_buffer regex
; /* for --regex */
334 int c
; /* An input byte. */
335 char itemcount
; /* Indicates we're at the beginning of an slocate db. */
336 int count
; /* The length of the prefix shared with the previous database entry. */
338 char *original_filename
; /* The current input database entry. */
339 size_t pathsize
; /* Amount allocated for it. */
340 char *munged_filename
; /* path or basename(path) */
341 FILE *fp
; /* The pathname database. */
342 const char *dbfile
; /* Its name, or "<stdin>" */
343 int slocatedb_format
; /* Allows us to cope with slocate's format variant */
344 GetwordEndianState endian_state
;
345 /* for the old database format,
346 the first and second characters of the most common bigrams. */
352 typedef int (*visitfunc
)(struct process_data
*procdata
,
359 struct visitor
*next
;
363 static struct visitor
*inspectors
= NULL
;
364 static struct visitor
*lastinspector
= NULL
;
365 static struct visitor
*past_pat_inspector
= NULL
;
367 static inline int visit(const struct visitor
*p
,
369 struct process_data
*procdata
,
370 const struct visitor
* const stop
)
372 register int result
= accept_flags
;
373 while ( (accept_flags
& result
) && (stop
!= p
) )
375 result
= (p
->inspector
)(procdata
, p
->context
);
381 /* 0 or 1 pattern(s) */
383 process_simple(struct process_data
*procdata
)
385 return visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, NULL
);
388 /* Accept if any pattern matches. */
390 process_or (struct process_data
*procdata
)
394 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_REJECTED
), procdata
, past_pat_inspector
);
395 if (result
== VISIT_CONTINUE
)
396 result
= VISIT_REJECTED
;
397 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
400 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
401 if (VISIT_CONTINUE
== result
)
402 return VISIT_ACCEPTED
;
407 /* Accept if all pattern match. */
409 process_and (struct process_data
*procdata
)
413 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, past_pat_inspector
);
414 if (result
== VISIT_CONTINUE
)
415 result
= VISIT_REJECTED
;
416 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
419 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
420 if (VISIT_CONTINUE
== result
)
421 return VISIT_ACCEPTED
;
426 typedef int (*processfunc
)(struct process_data
*procdata
);
428 static processfunc mainprocessor
= NULL
;
431 add_visitor(visitfunc fn
, void *context
)
433 struct visitor
*p
= xmalloc(sizeof(struct visitor
));
435 p
->context
= context
;
438 if (NULL
== lastinspector
)
440 lastinspector
= inspectors
= p
;
444 lastinspector
->next
= p
;
450 visit_justprint_quoted(struct process_data
*procdata
, void *context
)
453 print_quoted (stdout
, quote_opts
, stdout_is_a_tty
,
455 procdata
->original_filename
);
457 return VISIT_CONTINUE
;
461 visit_justprint_unquoted(struct process_data
*procdata
, void *context
)
464 fputs(procdata
->original_filename
, stdout
);
466 return VISIT_CONTINUE
;
470 toolong (struct process_data
*procdata
)
473 _("locate database %s contains a "
474 "filename longer than locate can handle"),
479 extend (struct process_data
*procdata
, size_t siz1
, size_t siz2
)
481 /* Figure out if the addition operation is safe before performing it. */
482 if (SIZE_MAX
- siz1
< siz2
)
486 else if (procdata
->pathsize
< (siz1
+siz2
))
488 procdata
->pathsize
= siz1
+siz2
;
489 procdata
->original_filename
= x2nrealloc (procdata
->original_filename
,
496 visit_old_format(struct process_data
*procdata
, void *context
)
501 if (EOF
== procdata
->c
)
504 /* Get the offset in the path where this path info starts. */
505 if (procdata
->c
== LOCATEDB_OLD_ESCAPE
)
510 procdata
->count
-= LOCATEDB_OLD_OFFSET
;
511 minval
= (0 - procdata
->count
);
512 if (procdata
->count
>= 0)
513 maxval
= (procdata
->len
- procdata
->count
);
515 maxval
= (procdata
->len
- 0);
516 word
= getword(procdata
->fp
, procdata
->dbfile
,
517 minval
, maxval
, &procdata
->endian_state
);
518 procdata
->count
+= word
;
519 assert(procdata
->count
>= 0);
523 procdata
->count
+= (procdata
->c
- LOCATEDB_OLD_OFFSET
);
524 assert(procdata
->count
>= 0);
527 /* Overlay the old path with the remainder of the new. Read
528 * more data until we get to the next filename.
530 for (i
=procdata
->count
;
531 (procdata
->c
= getc (procdata
->fp
)) > LOCATEDB_OLD_ESCAPE
;)
533 if (EOF
== procdata
->c
)
536 if (procdata
->c
< 0200)
538 /* An ordinary character. */
539 extend (procdata
, i
, 1u);
540 procdata
->original_filename
[i
++] = procdata
->c
;
544 /* Bigram markers have the high bit set. */
545 extend (procdata
, i
, 2u);
547 procdata
->original_filename
[i
++] = procdata
->bigram1
[procdata
->c
];
548 procdata
->original_filename
[i
++] = procdata
->bigram2
[procdata
->c
];
552 /* Consider the case where we executed the loop body zero times; we
553 * still need space for the terminating null byte.
555 extend (procdata
, i
, 1u);
556 procdata
->original_filename
[i
] = 0;
558 procdata
->munged_filename
= procdata
->original_filename
;
560 return VISIT_CONTINUE
;
564 visit_locate02_format(struct process_data
*procdata
, void *context
)
570 if (procdata
->slocatedb_format
)
572 if (procdata
->itemcount
== 0)
574 ungetc(procdata
->c
, procdata
->fp
);
578 else if (procdata
->itemcount
== 1)
580 procdata
->count
= procdata
->len
-1;
584 if (procdata
->c
== LOCATEDB_ESCAPE
)
585 procdata
->count
+= (short)get_short (procdata
->fp
);
586 else if (procdata
->c
> 127)
587 procdata
->count
+= procdata
->c
- 256;
589 procdata
->count
+= procdata
->c
;
594 if (procdata
->c
== LOCATEDB_ESCAPE
)
595 procdata
->count
+= (short)get_short (procdata
->fp
);
596 else if (procdata
->c
> 127)
597 procdata
->count
+= procdata
->c
- 256;
599 procdata
->count
+= procdata
->c
;
602 if (procdata
->count
> procdata
->len
|| procdata
->count
< 0)
604 /* This should not happen generally , but since we're
605 * reading in data which is outside our control, we
608 error(1, 0, _("locate database %s is corrupt or invalid"),
609 quotearg_n_style(0, locale_quoting_style
, procdata
->dbfile
));
612 /* Overlay the old path with the remainder of the new. */
613 nread
= locate_read_str (&procdata
->original_filename
,
615 procdata
->fp
, 0, procdata
->count
);
618 procdata
->c
= getc (procdata
->fp
);
619 procdata
->len
= procdata
->count
+ nread
;
620 s
= procdata
->original_filename
+ procdata
->len
- 1; /* Move to the last char in path. */
621 assert (s
[0] != '\0');
622 assert (s
[1] == '\0'); /* Our terminator. */
623 assert (s
[2] == '\0'); /* Added by locate_read_str. */
625 procdata
->munged_filename
= procdata
->original_filename
;
627 if (procdata
->slocatedb_format
)
629 /* Don't increment indefinitely, it might overflow. */
630 if (procdata
->itemcount
< 6)
632 ++(procdata
->itemcount
);
637 return VISIT_CONTINUE
;
641 visit_basename(struct process_data
*procdata
, void *context
)
644 procdata
->munged_filename
= last_component (procdata
->original_filename
);
646 return VISIT_CONTINUE
;
650 /* visit_existing_follow implements -L -e */
652 visit_existing_follow(struct process_data
*procdata
, void *context
)
657 /* munged_filename has been converted in some way (to lower case,
658 * or is just the base name of the file), and original_filename has not.
659 * Hence only original_filename is still actually the name of the file
660 * whose existence we would need to check.
662 if (stat(procdata
->original_filename
, &st
) != 0)
664 return VISIT_REJECTED
;
668 return VISIT_CONTINUE
;
672 /* visit_non_existing_follow implements -L -E */
674 visit_non_existing_follow(struct process_data
*procdata
, void *context
)
679 /* munged_filename has been converted in some way (to lower case,
680 * or is just the base name of the file), and original_filename has not.
681 * Hence only original_filename is still actually the name of the file
682 * whose existence we would need to check.
684 if (stat(procdata
->original_filename
, &st
) == 0)
686 return VISIT_REJECTED
;
690 return VISIT_CONTINUE
;
694 /* visit_existing_nofollow implements -P -e */
696 visit_existing_nofollow(struct process_data
*procdata
, void *context
)
701 /* munged_filename has been converted in some way (to lower case,
702 * or is just the base name of the file), and original_filename has not.
703 * Hence only original_filename is still actually the name of the file
704 * whose existence we would need to check.
706 if (lstat(procdata
->original_filename
, &st
) != 0)
708 return VISIT_REJECTED
;
712 return VISIT_CONTINUE
;
716 /* visit_non_existing_nofollow implements -P -E */
718 visit_non_existing_nofollow(struct process_data
*procdata
, void *context
)
723 /* munged_filename has been converted in some way (to lower case,
724 * or is just the base name of the file), and original_filename has not.
725 * Hence only original_filename is still actually the name of the file
726 * whose existence we would need to check.
728 if (lstat(procdata
->original_filename
, &st
) == 0)
730 return VISIT_REJECTED
;
734 return VISIT_CONTINUE
;
739 visit_substring_match_nocasefold_wide(struct process_data
*procdata
, void *context
)
741 const char *pattern
= context
;
743 if (NULL
!= mbsstr(procdata
->munged_filename
, pattern
))
744 return VISIT_ACCEPTED
;
746 return VISIT_REJECTED
;
750 visit_substring_match_nocasefold_narrow(struct process_data
*procdata
, void *context
)
752 const char *pattern
= context
;
753 assert(MB_CUR_MAX
== 1);
754 if (NULL
!= strstr(procdata
->munged_filename
, pattern
))
755 return VISIT_ACCEPTED
;
757 return VISIT_REJECTED
;
761 visit_substring_match_casefold_wide(struct process_data
*procdata
, void *context
)
763 const char *pattern
= context
;
765 if (NULL
!= mbscasestr(procdata
->munged_filename
, pattern
))
766 return VISIT_ACCEPTED
;
768 return VISIT_REJECTED
;
773 visit_substring_match_casefold_narrow(struct process_data
*procdata
, void *context
)
775 const char *pattern
= context
;
777 assert(MB_CUR_MAX
== 1);
778 if (NULL
!= strcasestr(procdata
->munged_filename
, pattern
))
779 return VISIT_ACCEPTED
;
781 return VISIT_REJECTED
;
786 visit_globmatch_nofold(struct process_data
*procdata
, void *context
)
788 const char *glob
= context
;
789 if (fnmatch(glob
, procdata
->munged_filename
, 0) != 0)
790 return VISIT_REJECTED
;
792 return VISIT_ACCEPTED
;
797 visit_globmatch_casefold(struct process_data
*procdata
, void *context
)
799 const char *glob
= context
;
800 if (fnmatch(glob
, procdata
->munged_filename
, FNM_CASEFOLD
) != 0)
801 return VISIT_REJECTED
;
803 return VISIT_ACCEPTED
;
808 visit_regex(struct process_data
*procdata
, void *context
)
810 struct regular_expression
*p
= context
;
811 const size_t len
= strlen(procdata
->munged_filename
);
813 int rv
= re_search (&p
->regex
, procdata
->munged_filename
,
815 (struct re_registers
*) NULL
);
818 return VISIT_REJECTED
; /* no match (-1), or internal error (-2) */
822 return VISIT_ACCEPTED
; /* match */
828 visit_stats(struct process_data
*procdata
, void *context
)
830 struct locate_stats
*p
= context
;
831 size_t len
= strlen(procdata
->original_filename
);
833 int highbit
, whitespace
, newline
;
835 ++(p
->total_filename_count
);
836 p
->total_filename_length
+= len
;
838 highbit
= whitespace
= newline
= 0;
839 for (s
=procdata
->original_filename
; *s
; ++s
)
841 if ( (int)(*s
) & 128 )
845 newline
= whitespace
= 1;
847 else if (isspace((unsigned char)*s
))
854 ++(p
->highbit_filename_count
);
856 ++(p
->whitespace_count
);
858 ++(p
->newline_count
);
860 return VISIT_CONTINUE
;
865 visit_limit(struct process_data
*procdata
, void *context
)
867 struct locate_limits
*p
= context
;
871 if (++p
->items_accepted
>= p
->limit
)
874 return VISIT_CONTINUE
;
878 visit_count(struct process_data
*procdata
, void *context
)
880 struct locate_limits
*p
= context
;
885 return VISIT_CONTINUE
;
888 /* Emit the statistics.
891 print_stats(int argc
, size_t database_file_size
)
893 char hbuf1
[LONGEST_HUMAN_READABLE
+ 1];
894 char hbuf2
[LONGEST_HUMAN_READABLE
+ 1];
895 char hbuf3
[LONGEST_HUMAN_READABLE
+ 1];
896 char hbuf4
[LONGEST_HUMAN_READABLE
+ 1];
898 printf(ngettext("Locate database size: %s byte\n",
899 "Locate database size: %s bytes\n",
901 human_readable ((uintmax_t) database_file_size
,
902 hbuf1
, human_ceiling
, 1, 1));
904 printf( (results_were_filtered
?
905 _("Matching Filenames: %s\n") :
906 _("All Filenames: %s\n")),
907 human_readable (statistics
.total_filename_count
,
908 hbuf1
, human_ceiling
, 1, 1));
909 /* XXX: We would ideally use ngettext() here, but I don't know
910 * how to use it to handle more than one possibly-plural thing/
912 printf(_("File names have a cumulative length of %s bytes.\n"
913 "Of those file names,\n"
914 "\n\t%s contain whitespace, "
915 "\n\t%s contain newline characters, "
916 "\n\tand %s contain characters with the high bit set.\n"),
917 human_readable (statistics
.total_filename_length
, hbuf1
, human_ceiling
, 1, 1),
918 human_readable (statistics
.whitespace_count
, hbuf2
, human_ceiling
, 1, 1),
919 human_readable (statistics
.newline_count
, hbuf3
, human_ceiling
, 1, 1),
920 human_readable (statistics
.highbit_filename_count
, hbuf4
, human_ceiling
, 1, 1));
924 if (results_were_filtered
)
926 printf(_("Some filenames may have been filtered out, "
927 "so we cannot compute the compression ratio.\n"));
931 if (statistics
.total_filename_length
)
933 /* A negative compression ratio just means that the
934 * compressed database is larger than the list of
935 * filenames. This can happen for example for
936 * old-format databases containing a small list of short
937 * filenames, because the bigram list is 256 bytes.
939 printf(_("Compression ratio %4.2f%% (higher is better)\n"),
940 100.0 * ((double)statistics
.total_filename_length
941 - (double) database_file_size
)
942 / (double) statistics
.total_filename_length
);
946 printf(_("Compression ratio is undefined\n"));
954 * Return nonzero if the data we read in indicates that we are
955 * looking at a LOCATE02 locate database.
958 looking_at_gnu_locatedb (const char *data
, size_t len
)
960 if (len
< sizeof (LOCATEDB_MAGIC
))
962 else if (0 == memcmp (data
, LOCATEDB_MAGIC
, sizeof (LOCATEDB_MAGIC
)))
963 return 1; /* We saw the magic byte sequence */
969 * Return nonzero if the data we read in indicates that we are
970 * looking at an slocate database.
973 looking_at_slocate_locatedb (const char *filename
,
986 /* Check that the magic number is a one-byte string */
989 if (isdigit((unsigned char)data
[0]))
991 /* looks promising. */
992 *seclevel
= (data
[0] - '0');
996 /* Hmm, well it's probably an slocate database
997 * of some awsomely huge security level, like 2.
998 * We don't know how to handle those.
1001 _("locate database %s looks like an slocate "
1002 "database but it seems to have security level %c, "
1003 "which GNU findutils does not currently support"),
1004 quotearg_n_style(0, locale_quoting_style
, filename
),
1021 /* Definitely not slocate. */
1029 i_am_little_endian(void)
1033 unsigned char uch
[4];
1038 u
.uch
[1] = u
.uch
[2] = u
.uch
[3] = 0;
1045 /* Print or count the entries in DBFILE that match shell globbing patterns in
1046 ARGV. Return the number of entries matched. */
1048 static unsigned long
1049 search_one_database (int argc
,
1058 struct locate_limits
*plimit
,
1064 char *pathpart
; /* A pattern to consider. */
1065 int argn
; /* Index to current pattern in argv. */
1066 int nread
; /* number of bytes read from an entry. */
1067 struct process_data procdata
; /* Storage for data shared with visitors. */
1068 int slocate_seclevel
;
1070 struct visitor
* pvis
; /* temp for determining past_pat_inspector. */
1071 const char *format_name
;
1072 enum ExistenceCheckType do_check_existence
;
1075 /* We may turn on existence checking for a given database.
1076 * We ensure that we can return to the previous behaviour
1077 * by using two variables, do_check_existence (which we act on)
1078 * and check_existence (whcih indicates the default before we
1079 * adjust it on the bassis of what kind of database we;re using
1081 do_check_existence
= check_existence
;
1085 regex_options
|= RE_ICASE
;
1088 procdata
.endian_state
= GetwordEndianStateInitial
;
1089 procdata
.len
= procdata
.count
= 0;
1090 procdata
.slocatedb_format
= 0;
1091 procdata
.itemcount
= 0;
1093 procdata
.dbfile
= dbfile
;
1096 /* Set up the inspection regime */
1098 lastinspector
= NULL
;
1099 past_pat_inspector
= NULL
;
1100 results_were_filtered
= false;
1102 procdata
.pathsize
= 1026; /* Increased as necessary by locate_read_str. */
1104 procdata
.pathsize
= 128; /* Increased as necessary by locate_read_str. */
1106 procdata
.original_filename
= xmalloc (procdata
.pathsize
);
1109 nread
= fread (procdata
.original_filename
, 1, SLOCATE_DB_MAGIC_LEN
,
1111 slocate_seclevel
= 0;
1112 if (looking_at_slocate_locatedb(procdata
.dbfile
,
1113 procdata
.original_filename
,
1118 _("%s is an slocate database. "
1119 "Support for these is new, expect problems for now."),
1120 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1122 /* slocate also uses frcode, but with a different header.
1123 * We handle the header here and then work with the data
1124 * in the normal way.
1126 if (slocate_seclevel
> 1)
1128 /* We don't know what those security levels mean,
1129 * so do nothing further
1132 _("%s is an slocate database of unsupported security level %d; skipping it."),
1133 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
),
1137 else if (slocate_seclevel
> 0)
1139 /* Don't show the filenames to the user if they don't exist.
1140 * Showing stats is safe since filenames are only counted
1141 * after the existence check
1143 if (ACCEPT_NON_EXISTING
== check_existence
)
1145 /* Do not allow the user to see a list of filenames that they
1149 _("You specified the -E option, but that option "
1150 "cannot be used with slocate-format databases "
1151 "with a non-zero security level. No results will be "
1152 "generated for this database.\n"));
1155 if (ACCEPT_EXISTING
!= do_check_existence
)
1157 if (enable_print
|| stats
)
1160 _("%s is an slocate database. "
1161 "Turning on the '-e' option."),
1162 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1164 do_check_existence
= ACCEPT_EXISTING
;
1167 add_visitor(visit_locate02_format
, NULL
);
1168 format_name
= "slocate";
1169 procdata
.slocatedb_format
= 1;
1175 procdata
.slocatedb_format
= 0;
1176 extend (&procdata
, sizeof(LOCATEDB_MAGIC
), 0u);
1177 nread2
= fread (procdata
.original_filename
+nread
, 1, sizeof (LOCATEDB_MAGIC
)-nread
,
1179 if (looking_at_gnu_locatedb(procdata
.original_filename
, nread
+nread2
))
1181 add_visitor(visit_locate02_format
, NULL
);
1182 format_name
= "GNU LOCATE02";
1184 else /* Use the old format */
1189 extend (&procdata
, 256u, 0u);
1190 /* Read the list of the most common bigrams in the database. */
1193 int more_read
= fread (procdata
.original_filename
+ nread
, 1,
1194 256 - nread
, procdata
.fp
);
1195 if ( (more_read
+ nread
) != 256 )
1198 _("Old-format locate database %s is "
1199 "too short to be valid"),
1200 quotearg_n_style(0, locale_quoting_style
, dbfile
));
1205 for (i
= 0; i
< 128; i
++)
1207 procdata
.bigram1
[i
] = procdata
.original_filename
[i
<< 1];
1208 procdata
.bigram2
[i
] = procdata
.original_filename
[(i
<< 1) + 1];
1210 format_name
= "old";
1212 add_visitor(visit_old_format
, NULL
);
1217 add_visitor(visit_basename
, NULL
);
1219 /* Add an inspector for each pattern we're looking for. */
1220 for ( argn
= 0; argn
< argc
; argn
++ )
1222 results_were_filtered
= true;
1223 pathpart
= argv
[argn
];
1226 struct regular_expression
*p
= xmalloc(sizeof(*p
));
1227 const char *error_message
= NULL
;
1229 memset (&p
->regex
, 0, sizeof (p
->regex
));
1231 re_set_syntax(regex_options
);
1232 p
->regex
.allocated
= 100;
1233 p
->regex
.buffer
= xmalloc (p
->regex
.allocated
);
1234 p
->regex
.fastmap
= NULL
;
1235 p
->regex
.syntax
= regex_options
;
1236 p
->regex
.translate
= NULL
;
1238 error_message
= re_compile_pattern (pathpart
, strlen (pathpart
),
1242 error (1, 0, "%s", error_message
);
1246 add_visitor(visit_regex
, p
);
1249 else if (contains_metacharacter(pathpart
))
1252 add_visitor(visit_globmatch_casefold
, pathpart
);
1254 add_visitor(visit_globmatch_nofold
, pathpart
);
1258 /* No glob characters used. Hence we match on
1259 * _any part_ of the filename, not just the
1260 * basename. This seems odd to me, but it is the
1261 * traditional behaviour.
1262 * James Youngman <jay@gnu.org>
1265 if (1 == MB_CUR_MAX
)
1267 /* As an optimisation, use a strstr() matcher if we are
1268 * in a unibyte locale. This can give a x2 speedup in
1269 * the C locale. Some light testing reveals that
1270 * glibc's strstr() is somewhere around 40% faster than
1271 * gnulib's, so we just use strstr().
1273 matcher
= ignore_case
?
1274 visit_substring_match_casefold_narrow
:
1275 visit_substring_match_nocasefold_narrow
;
1279 matcher
= ignore_case
?
1280 visit_substring_match_casefold_wide
:
1281 visit_substring_match_nocasefold_wide
;
1283 add_visitor(matcher
, pathpart
);
1287 pvis
= lastinspector
;
1289 /* We add visit_existing_*() as late as possible to reduce the
1290 * number of stat() calls.
1292 switch (do_check_existence
)
1294 case ACCEPT_EXISTING
:
1295 results_were_filtered
= true;
1296 if (follow_symlinks
) /* -L, default */
1297 add_visitor(visit_existing_follow
, NULL
);
1299 add_visitor(visit_existing_nofollow
, NULL
);
1302 case ACCEPT_NON_EXISTING
:
1303 results_were_filtered
= true;
1304 if (follow_symlinks
) /* -L, default */
1305 add_visitor(visit_non_existing_follow
, NULL
);
1307 add_visitor(visit_non_existing_nofollow
, NULL
);
1310 case ACCEPT_EITHER
: /* Default, neither -E nor -e */
1311 /* do nothing; no extra processing. */
1315 /* Security issue: The stats visitor must be added immediately
1316 * before the print visitor, because otherwise the -S option would
1317 * leak information about files that the caller cannot see.
1320 add_visitor(visit_stats
, &statistics
);
1324 if (print_quoted_filename
)
1325 add_visitor(visit_justprint_quoted
, NULL
);
1327 add_visitor(visit_justprint_unquoted
, NULL
);
1332 add_visitor(visit_limit
, plimit
);
1334 add_visitor(visit_count
, plimit
);
1339 past_pat_inspector
= pvis
->next
;
1341 mainprocessor
= process_and
;
1343 mainprocessor
= process_or
;
1346 mainprocessor
= process_simple
;
1350 printf(_("Database %s is in the %s format.\n"),
1356 procdata
.c
= getc (procdata
.fp
);
1357 /* If we are searching for filename patterns, the inspector list
1358 * will contain an entry for each pattern for which we are searching.
1360 while ( (procdata
.c
!= EOF
) &&
1361 (VISIT_ABORT
!= (mainprocessor
)(&procdata
)) )
1363 /* Do nothing; all the work is done in the visitor functions. */
1370 int host_little_endian
= i_am_little_endian();
1371 const char *little
= _("The database has little-endian "
1372 "machine-word encoding.\n");
1373 const char *big
= _("The database has big-endian "
1374 "machine-word encoding.\n");
1376 if (GetwordEndianStateNative
== procdata
.endian_state
)
1378 printf("%s", (host_little_endian
? little
: big
));
1380 else if (GetwordEndianStateSwab
== procdata
.endian_state
)
1382 printf("%s", (host_little_endian
? big
: little
));
1386 printf(_("The database machine-word encoding order "
1387 "is not obvious.\n"));
1391 print_stats(argc
, filesize
);
1394 if (ferror (procdata
.fp
))
1396 error (0, errno
, "%s",
1397 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1400 return plimit
->items_accepted
;
1406 extern char *version_string
;
1408 /* The name this program was run with. */
1412 usage (FILE *stream
)
1414 fprintf (stream
, _("\
1415 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1416 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1417 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1418 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1419 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1420 [--max-database-age D] [--version] [--help]\n\
1423 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream
);
1427 REGEXTYPE_OPTION
= CHAR_MAX
+ 1,
1432 static struct option
const longopts
[] =
1434 {"database", required_argument
, NULL
, 'd'},
1435 {"existing", no_argument
, NULL
, 'e'},
1436 {"non-existing", no_argument
, NULL
, 'E'},
1437 {"ignore-case", no_argument
, NULL
, 'i'},
1438 {"all", no_argument
, NULL
, 'A'},
1439 {"help", no_argument
, NULL
, 'h'},
1440 {"version", no_argument
, NULL
, 'v'},
1441 {"null", no_argument
, NULL
, '0'},
1442 {"count", no_argument
, NULL
, 'c'},
1443 {"wholename", no_argument
, NULL
, 'w'},
1444 {"wholepath", no_argument
, NULL
, 'w'}, /* Synonym. */
1445 {"basename", no_argument
, NULL
, 'b'},
1446 {"print", no_argument
, NULL
, 'p'},
1447 {"stdio", no_argument
, NULL
, 's'},
1448 {"mmap", no_argument
, NULL
, 'm'},
1449 {"limit", required_argument
, NULL
, 'l'},
1450 {"regex", no_argument
, NULL
, 'r'},
1451 {"regextype", required_argument
, NULL
, REGEXTYPE_OPTION
},
1452 {"statistics", no_argument
, NULL
, 'S'},
1453 {"follow", no_argument
, NULL
, 'L'},
1454 {"nofollow", no_argument
, NULL
, 'P'},
1455 {"max-database-age", required_argument
, NULL
, MAX_DB_AGE
},
1456 {NULL
, no_argument
, NULL
, 0}
1463 const char * what
= "failed";
1464 const uid_t orig_euid
= geteuid();
1465 const uid_t uid
= getuid();
1466 const gid_t gid
= getgid();
1469 /* Use of setgroups() is restricted to root only. */
1472 /* We're either root or running setuid-root. */
1475 if (0 != setgroups(1u, groups
))
1477 what
= _("failed to drop group privileges");
1483 /* Drop any setuid privileges */
1484 if (uid
!= orig_euid
)
1488 /* We're really root anyway, but are setuid to something else. Leave it. */
1493 if (0 != setuid(getuid()))
1495 what
= _("failed to drop setuid privileges");
1499 /* Defend against the case where the attacker runs us with the
1500 * capability to call setuid() turned off, which on some systems
1501 * will cause the above attempt to drop privileges fail (leaving us
1506 /* Check that we can no longer switch bask to root */
1509 what
= _("Failed to fully drop privileges");
1510 /* The errno value here is not interesting (since
1511 * the system call we are complaining about
1512 * succeeded when we wanted it to fail). Arrange
1513 * for the call to error() not to print the errno
1514 * value by setting errno=0.
1523 /* Drop any setgid privileges */
1525 if (0 != setgid(gid
))
1527 what
= _("failed to drop setgid privileges");
1535 error(1, errno
, "%s",
1536 quotearg_n_style(0, locale_quoting_style
, what
));
1544 /* deliberate infinite loop */
1549 opendb(const char *name
)
1551 int fd
= open(name
, O_RDONLY
1552 #if defined O_LARGEFILE
1558 /* Make sure it won't survive an exec */
1559 if (0 != fcntl(fd
, F_SETFD
, FD_CLOEXEC
))
1569 dolocate (int argc
, char **argv
, int secure_db_fd
)
1572 unsigned long int found
= 0uL;
1573 int ignore_case
= 0;
1576 int basename_only
= 0;
1579 int regex_options
= RE_SYNTAX_EMACS
;
1584 int they_chose_db
= 0;
1585 bool did_stdin
= false; /* Set to prevent rereading stdin. */
1587 program_name
= argv
[0];
1589 #ifdef HAVE_SETLOCALE
1590 setlocale (LC_ALL
, "");
1592 bindtextdomain (PACKAGE
, LOCALEDIR
);
1593 textdomain (PACKAGE
);
1594 atexit (close_stdout
);
1597 limits
.items_accepted
= 0;
1599 quote_opts
= clone_quoting_options (NULL
);
1600 print_quoted_filename
= true;
1602 /* We cannot simultaneously trust $LOCATE_PATH and use the
1603 * setuid-access-controlled database,, since that could cause a leak
1606 dbpath
= getenv ("LOCATE_PATH");
1612 check_existence
= ACCEPT_EITHER
;
1617 int optc
= getopt_long (argc
, argv
, "Abcd:eEil:prsm0SwHPL", longopts
,
1626 print_quoted_filename
= false; /* print filename 'raw'. */
1647 check_existence
= ACCEPT_EXISTING
;
1651 check_existence
= ACCEPT_NON_EXISTING
;
1663 /* XXX: nothing in the test suite for this option. */
1664 set_max_db_age (optarg
);
1672 display_findutils_version ("locate");
1683 case REGEXTYPE_OPTION
:
1684 regex_options
= get_regex_type (optarg
);
1692 follow_symlinks
= 1;
1695 /* In find, -P and -H differ in the way they handle paths
1696 * given on the command line. This is not relevant for
1697 * locate, but the -H option is supported because it is
1698 * probably more intuitive to do so.
1702 follow_symlinks
= 0;
1708 strtol_error err
= xstrtoumax (optarg
, &end
, 10, &limits
.limit
,
1710 if (LONGINT_OK
!= err
)
1711 xstrtol_fatal (err
, opti
, optc
, longopts
, optarg
);
1716 case 's': /* use stdio */
1717 case 'm': /* use mmap */
1718 /* These options are implemented simply for
1719 * compatibility with FreeBSD
1730 /* If the user gave the -d option or set LOCATE_PATH,
1731 * relinquish access to the secure database.
1735 if (secure_db_fd
>= 0)
1737 close(secure_db_fd
);
1742 if (!just_count
&& !stats
)
1752 if (!just_count
&& optind
== argc
)
1760 if (1 == isatty(STDOUT_FILENO
))
1761 stdout_is_a_tty
= true;
1763 stdout_is_a_tty
= false;
1766 next_element (dbpath
, 0); /* Initialize. */
1768 /* Bail out early if limit already reached. */
1769 while (!use_limit
|| limits
.limit
> limits
.items_accepted
)
1775 statistics
.compressed_bytes
=
1776 statistics
.total_filename_count
=
1777 statistics
.total_filename_length
=
1778 statistics
.whitespace_count
=
1779 statistics
.newline_count
=
1780 statistics
.highbit_filename_count
= 0u;
1784 /* Take the next element from the list of databases */
1785 e
= next_element ((char *) NULL
, 0);
1789 if (0 == strcmp (e
, "-"))
1794 _("warning: the locate database can only be read from stdin once."));
1806 if (0 == strlen(e
) || 0 == strcmp(e
, "."))
1811 /* open the database */
1815 error (0, errno
, "%s",
1816 quotearg_n_style(0, locale_quoting_style
, e
));
1823 if (-1 == secure_db_fd
)
1825 /* Already searched the database, it's time to exit the loop */
1830 e
= selected_secure_db
;
1836 /* Check the database to see if it is old. */
1839 error (0, errno
, "%s",
1840 quotearg_n_style(0, locale_quoting_style
, e
));
1841 /* continue anyway */
1842 filesize
= (off_t
)0;
1848 filesize
= st
.st_size
;
1850 if ((time_t)-1 == time(&now
))
1852 /* If we can't tell the time, we don't know how old the
1853 * database is. But since the message is just advisory,
1854 * we continue anyway.
1856 error (0, errno
, _("time system call failed"));
1860 double age
= difftime(now
, st
.st_mtime
);
1861 double warn_seconds
= SECONDS_PER_UNIT
* warn_number_units
;
1862 if (age
> warn_seconds
)
1865 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1867 _("warning: database %s is more than %d %s old (actual age is %.1f %s)"),
1868 quotearg_n_style(0, locale_quoting_style
, e
),
1869 warn_number_units
, _(warn_name_units
),
1870 (age
/(double)SECONDS_PER_UNIT
), _(warn_name_units
));
1875 fp
= fdopen(fd
, "r");
1878 error (0, errno
, "%s",
1879 quotearg_n_style(0, locale_quoting_style
, e
));
1883 /* Search this database for all patterns simultaneously */
1884 found
= search_one_database (argc
- optind
, &argv
[optind
],
1886 ignore_case
, print
, basename_only
,
1887 use_limit
, &limits
, stats
,
1888 op_and
, regex
, regex_options
);
1890 /* Close the databsase (even if it is stdin) */
1891 if (fclose (fp
) == EOF
)
1893 error (0, errno
, "%s",
1894 quotearg_n_style(0, locale_quoting_style
, e
));
1901 printf("%ld\n", found
);
1904 if (found
|| (use_limit
&& (limits
.limit
==0)) || stats
)
1910 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1912 open_secure_db(void)
1916 const char * secure_db_list
[] =
1919 "/var/lib/slocate/slocate.db",
1922 for (i
=0; secure_db_list
[i
]; ++i
)
1924 fd
= opendb(secure_db_list
[i
]);
1927 selected_secure_db
= secure_db_list
[i
];
1935 main (int argc
, char **argv
)
1937 int dbfd
= open_secure_db();
1940 return dolocate(argc
, argv
, dbfd
);