1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 /* Usage: locate [options] pattern...
20 Scan a pathname list for the full pathname of a file, given only
21 a piece of the name (possibly containing shell globbing metacharacters).
22 The list has been processed with front-compression, which reduces
23 the list size by a factor of 4-5.
24 Recognizes two database formats, old and new. The old format is
25 bigram coded, which reduces space by a further 20-25% and uses the
26 following encoding of the database bytes:
28 0-28 likeliest differential counts + offset (14) to make nonnegative
29 30 escape code for out-of-range count to follow in next halfword
30 128-255 bigram codes (the 128 most common, as determined by `updatedb')
31 32-127 single character (printable) ASCII remainder
33 Earlier versions of GNU locate used to use a novel two-tiered
34 string search technique, which was described in Usenix ;login:, Vol
35 8, No 1, February/March, 1983, p. 8.
37 However, latterly code changes to provide additional functionality
38 became dificult to make with the existing reading scheme, and so
39 we no longer perform the matching as efficiently as we used to (that is,
40 we no longer use the same algorithm).
42 The old algorithm was:
44 First, match a metacharacter-free subpattern and a partial
45 pathname BACKWARDS to avoid full expansion of the pathname list.
46 The time savings is 40-50% over forward matching, which cannot
47 efficiently handle overlapped search patterns and compressed
50 Then, match the actual shell glob pattern (if in this form)
51 against the candidate pathnames using the slower shell filename
55 Written by James A. Woods <jwoods@adobe.com>.
56 Modified by David MacKenzie <djm@gnu.org>.
57 Additional work by James Youngman and Bas van Gompel.
65 #include <sys/types.h>
66 #include <grp.h> /* for setgroups() */
75 /* The presence of unistd.h is assumed by gnulib these days, so we
76 * might as well assume it too.
78 /* We need <unistd.h> for isatty(). */
98 # define _(Text) gettext (Text)
100 # define _(Text) Text
101 #define textdomain(Domain)
102 #define bindtextdomain(Package, Directory)
103 #define ngettext(singular,plural,n) ((1==n) ? singular : plural)
106 # define N_(String) gettext_noop (String)
108 /* We used to use (String) instead of just String, but apparently ISO C
109 * doesn't allow this (at least, that's what HP said when someone reported
110 * this as a compiler bug). This is HP case number 1205608192. See
111 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
112 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
113 * like: static const char buf[] = ("string");
115 # define N_(String) String
118 #include "locatedb.h"
119 #include "progname.h"
124 #include "closeout.h"
125 #include "nextelem.h"
127 #include "quotearg.h"
128 #include "printquoted.h"
129 #include "regextype.h"
130 #include "findutils-version.h"
132 /* Note that this evaluates Ch many times. */
134 # define TOUPPER(Ch) toupper (Ch)
135 # define TOLOWER(Ch) tolower (Ch)
137 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
138 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
141 /* Warn if a database is older than this. 8 days allows for a weekly
142 update that takes up to a day to perform. */
143 static unsigned int warn_number_units
= 8;
145 /* Printable name of units used in WARN_SECONDS */
146 static const char warn_name_units
[] = N_("days");
147 #define SECONDS_PER_UNIT (60 * 60 * 24)
151 VISIT_CONTINUE
= 1, /* please call the next visitor */
152 VISIT_ACCEPTED
= 2, /* accepted, call no futher callbacks for this file */
153 VISIT_REJECTED
= 4, /* rejected, process next file. */
154 VISIT_ABORT
= 8 /* rejected, process no more files. */
157 enum ExistenceCheckType
159 ACCEPT_EITHER
, /* Corresponds to lack of -E/-e option */
160 ACCEPT_EXISTING
, /* Corresponds to option -e */
161 ACCEPT_NON_EXISTING
/* Corresponds to option -E */
164 /* Check for existence of files before printing them out? */
165 enum ExistenceCheckType check_existence
= ACCEPT_EITHER
;
167 static int follow_symlinks
= 1;
169 /* What to separate the results with. */
170 static int separator
= '\n';
172 static struct quoting_options
* quote_opts
= NULL
;
173 static bool stdout_is_a_tty
;
174 static bool print_quoted_filename
;
175 static bool results_were_filtered
;
177 static const char *selected_secure_db
= NULL
;
180 /* Change the number of days old the database can be
181 * before we complain about it.
184 set_max_db_age (const char *s
)
187 unsigned long int val
;
188 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
192 error (EXIT_FAILURE
, 0,
193 _("The argument for option --max-database-age must not be empty"));
197 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
198 * we would not be able to tell if that is the correct answer, or whether it
199 * signifies an error.
202 val
= strtoul (s
, &end
, 10);
204 /* Diagnose number too large, non-numbes and trailing junk. */
205 if ((ULONG_MAX
== val
&& ERANGE
== errno
) ||
206 (0 == val
&& EINVAL
== errno
))
208 error (EXIT_FAILURE
, errno
,
209 _("Invalid argument %s for option --max-database-age"),
210 quotearg_n_style (0, locale_quoting_style
, s
));
214 /* errno wasn't set, don't print its message */
215 error (EXIT_FAILURE
, 0,
216 _("Invalid argument %s for option --max-database-age"),
217 quotearg_n_style (0, locale_quoting_style
, s
));
221 warn_number_units
= val
;
227 /* Read in a 16-bit int, high byte first (network byte order). */
235 x
= (signed char) fgetc (fp
) << 8;
236 x
|= (fgetc (fp
) & 0xff);
240 const char * const metacharacters
= "*?[]\\";
242 /* Return nonzero if S contains any shell glob characters.
245 contains_metacharacter (const char *s
)
247 if (NULL
== strpbrk (s
, metacharacters
))
255 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
256 * until we reach DELIMITER or end-of-file. We reallocate the buffer
257 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
258 * is made regarding the content of the data (i.e. the implementation is
259 * 8-bit clean, the only delimiter is DELIMITER).
261 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
262 * has been removed from gnulib.
264 * We call the function locate_read_str() to avoid a name clash with the curses
268 locate_read_str (char **buf
, size_t *siz
, FILE *fp
, int delimiter
, int offs
)
275 nread
= getdelim (&p
, &sz
, delimiter
, fp
);
280 needed
= offs
+ nread
+ 1u;
283 char *pnew
= realloc (*buf
, needed
);
286 return -1; /* FAIL */
294 memcpy ((*buf
)+offs
, p
, nread
);
304 uintmax_t items_accepted
;
306 static struct locate_limits limits
;
311 uintmax_t compressed_bytes
;
312 uintmax_t total_filename_count
;
313 uintmax_t total_filename_length
;
314 uintmax_t whitespace_count
;
315 uintmax_t newline_count
;
316 uintmax_t highbit_filename_count
;
318 static struct locate_stats statistics
;
321 struct regular_expression
323 struct re_pattern_buffer regex
; /* for --regex */
329 int c
; /* An input byte. */
330 char itemcount
; /* Indicates we're at the beginning of an slocate db. */
331 int count
; /* The length of the prefix shared with the previous database entry. */
333 char *original_filename
; /* The current input database entry. */
334 size_t pathsize
; /* Amount allocated for it. */
335 char *munged_filename
; /* path or basename(path) */
336 FILE *fp
; /* The pathname database. */
337 const char *dbfile
; /* Its name, or "<stdin>" */
338 int slocatedb_format
; /* Allows us to cope with slocate's format variant */
339 GetwordEndianState endian_state
;
340 /* for the old database format,
341 the first and second characters of the most common bigrams. */
347 typedef int (*visitfunc
)(struct process_data
*procdata
,
354 struct visitor
*next
;
358 static struct visitor
*inspectors
= NULL
;
359 static struct visitor
*lastinspector
= NULL
;
360 static struct visitor
*past_pat_inspector
= NULL
;
362 static inline int visit (const struct visitor
*p
,
364 struct process_data
*procdata
,
365 const struct visitor
* const stop
)
367 register int result
= accept_flags
;
368 while ( (accept_flags
& result
) && (stop
!= p
) )
370 result
= (p
->inspector
)(procdata
, p
->context
);
376 /* 0 or 1 pattern(s) */
378 process_simple (struct process_data
*procdata
)
380 return visit (inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, NULL
);
383 /* Accept if any pattern matches. */
385 process_or (struct process_data
*procdata
)
389 result
= visit (inspectors
, (VISIT_CONTINUE
|VISIT_REJECTED
), procdata
, past_pat_inspector
);
390 if (result
== VISIT_CONTINUE
)
391 result
= VISIT_REJECTED
;
392 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
395 result
= visit (past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
396 if (VISIT_CONTINUE
== result
)
397 return VISIT_ACCEPTED
;
402 /* Accept if all pattern match. */
404 process_and (struct process_data
*procdata
)
408 result
= visit (inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, past_pat_inspector
);
409 if (result
== VISIT_CONTINUE
)
410 result
= VISIT_REJECTED
;
411 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
414 result
= visit (past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
415 if (VISIT_CONTINUE
== result
)
416 return VISIT_ACCEPTED
;
421 typedef int (*processfunc
)(struct process_data
*procdata
);
423 static processfunc mainprocessor
= NULL
;
426 add_visitor (visitfunc fn
, void *context
)
428 struct visitor
*p
= xmalloc (sizeof (struct visitor
));
430 p
->context
= context
;
433 if (NULL
== lastinspector
)
435 lastinspector
= inspectors
= p
;
439 lastinspector
->next
= p
;
445 visit_justprint_quoted (struct process_data
*procdata
, void *context
)
448 print_quoted (stdout
, quote_opts
, stdout_is_a_tty
,
450 procdata
->original_filename
);
452 return VISIT_CONTINUE
;
456 visit_justprint_unquoted (struct process_data
*procdata
, void *context
)
459 fputs (procdata
->original_filename
, stdout
);
461 return VISIT_CONTINUE
;
465 toolong (struct process_data
*procdata
)
467 error (EXIT_FAILURE
, 0,
468 _("locate database %s contains a "
469 "filename longer than locate can handle"),
474 extend (struct process_data
*procdata
, size_t siz1
, size_t siz2
)
476 /* Figure out if the addition operation is safe before performing it. */
477 if (SIZE_MAX
- siz1
< siz2
)
481 else if (procdata
->pathsize
< (siz1
+siz2
))
483 procdata
->pathsize
= siz1
+siz2
;
484 procdata
->original_filename
= x2nrealloc (procdata
->original_filename
,
491 visit_old_format (struct process_data
*procdata
, void *context
)
496 if (EOF
== procdata
->c
)
499 /* Get the offset in the path where this path info starts. */
500 if (procdata
->c
== LOCATEDB_OLD_ESCAPE
)
505 procdata
->count
-= LOCATEDB_OLD_OFFSET
;
506 minval
= (0 - procdata
->count
);
507 if (procdata
->count
>= 0)
508 maxval
= (procdata
->len
- procdata
->count
);
510 maxval
= (procdata
->len
- 0);
511 word
= getword (procdata
->fp
, procdata
->dbfile
,
512 minval
, maxval
, &procdata
->endian_state
);
513 procdata
->count
+= word
;
514 assert (procdata
->count
>= 0);
518 procdata
->count
+= (procdata
->c
- LOCATEDB_OLD_OFFSET
);
519 assert (procdata
->count
>= 0);
522 /* Overlay the old path with the remainder of the new. Read
523 * more data until we get to the next filename.
525 for (i
=procdata
->count
;
526 (procdata
->c
= getc (procdata
->fp
)) > LOCATEDB_OLD_ESCAPE
;)
528 if (EOF
== procdata
->c
)
531 if (procdata
->c
< 0200)
533 /* An ordinary character. */
534 extend (procdata
, i
, 1u);
535 procdata
->original_filename
[i
++] = procdata
->c
;
539 /* Bigram markers have the high bit set. */
540 extend (procdata
, i
, 2u);
542 procdata
->original_filename
[i
++] = procdata
->bigram1
[procdata
->c
];
543 procdata
->original_filename
[i
++] = procdata
->bigram2
[procdata
->c
];
547 /* Consider the case where we executed the loop body zero times; we
548 * still need space for the terminating null byte.
550 extend (procdata
, i
, 1u);
551 procdata
->original_filename
[i
] = 0;
553 procdata
->munged_filename
= procdata
->original_filename
;
555 return VISIT_CONTINUE
;
559 visit_locate02_format (struct process_data
*procdata
, void *context
)
565 if (procdata
->slocatedb_format
)
567 if (procdata
->itemcount
== 0)
569 ungetc (procdata
->c
, procdata
->fp
);
573 else if (procdata
->itemcount
== 1)
575 procdata
->count
= procdata
->len
-1;
579 if (procdata
->c
== LOCATEDB_ESCAPE
)
580 procdata
->count
+= (short)get_short (procdata
->fp
);
581 else if (procdata
->c
> 127)
582 procdata
->count
+= procdata
->c
- 256;
584 procdata
->count
+= procdata
->c
;
589 if (procdata
->c
== LOCATEDB_ESCAPE
)
590 procdata
->count
+= (short)get_short (procdata
->fp
);
591 else if (procdata
->c
> 127)
592 procdata
->count
+= procdata
->c
- 256;
594 procdata
->count
+= procdata
->c
;
597 if (procdata
->count
> procdata
->len
|| procdata
->count
< 0)
599 /* This should not happen generally , but since we're
600 * reading in data which is outside our control, we
603 error (EXIT_FAILURE
, 0, _("locate database %s is corrupt or invalid"),
604 quotearg_n_style (0, locale_quoting_style
, procdata
->dbfile
));
607 /* Overlay the old path with the remainder of the new. */
608 nread
= locate_read_str (&procdata
->original_filename
,
610 procdata
->fp
, 0, procdata
->count
);
613 procdata
->c
= getc (procdata
->fp
);
614 procdata
->len
= procdata
->count
+ nread
;
615 s
= procdata
->original_filename
+ procdata
->len
- 1; /* Move to the last char in path. */
616 assert (s
[0] != '\0');
617 assert (s
[1] == '\0'); /* Our terminator. */
618 assert (s
[2] == '\0'); /* Added by locate_read_str. */
620 procdata
->munged_filename
= procdata
->original_filename
;
622 if (procdata
->slocatedb_format
)
624 /* Don't increment indefinitely, it might overflow. */
625 if (procdata
->itemcount
< 6)
627 ++(procdata
->itemcount
);
632 return VISIT_CONTINUE
;
636 visit_basename (struct process_data
*procdata
, void *context
)
639 procdata
->munged_filename
= last_component (procdata
->original_filename
);
641 return VISIT_CONTINUE
;
645 /* visit_existing_follow implements -L -e */
647 visit_existing_follow (struct process_data
*procdata
, void *context
)
652 /* munged_filename has been converted in some way (to lower case,
653 * or is just the base name of the file), and original_filename has not.
654 * Hence only original_filename is still actually the name of the file
655 * whose existence we would need to check.
657 if (stat (procdata
->original_filename
, &st
) != 0)
659 return VISIT_REJECTED
;
663 return VISIT_CONTINUE
;
667 /* visit_non_existing_follow implements -L -E */
669 visit_non_existing_follow (struct process_data
*procdata
, void *context
)
674 /* munged_filename has been converted in some way (to lower case,
675 * or is just the base name of the file), and original_filename has not.
676 * Hence only original_filename is still actually the name of the file
677 * whose existence we would need to check.
679 if (stat (procdata
->original_filename
, &st
) == 0)
681 return VISIT_REJECTED
;
685 return VISIT_CONTINUE
;
689 /* visit_existing_nofollow implements -P -e */
691 visit_existing_nofollow (struct process_data
*procdata
, void *context
)
696 /* munged_filename has been converted in some way (to lower case,
697 * or is just the base name of the file), and original_filename has not.
698 * Hence only original_filename is still actually the name of the file
699 * whose existence we would need to check.
701 if (lstat (procdata
->original_filename
, &st
) != 0)
703 return VISIT_REJECTED
;
707 return VISIT_CONTINUE
;
711 /* visit_non_existing_nofollow implements -P -E */
713 visit_non_existing_nofollow (struct process_data
*procdata
, void *context
)
718 /* munged_filename has been converted in some way (to lower case,
719 * or is just the base name of the file), and original_filename has not.
720 * Hence only original_filename is still actually the name of the file
721 * whose existence we would need to check.
723 if (lstat (procdata
->original_filename
, &st
) == 0)
725 return VISIT_REJECTED
;
729 return VISIT_CONTINUE
;
734 visit_substring_match_nocasefold_wide (struct process_data
*procdata
, void *context
)
736 const char *pattern
= context
;
738 if (NULL
!= mbsstr (procdata
->munged_filename
, pattern
))
739 return VISIT_ACCEPTED
;
741 return VISIT_REJECTED
;
745 visit_substring_match_nocasefold_narrow (struct process_data
*procdata
, void *context
)
747 const char *pattern
= context
;
748 assert (MB_CUR_MAX
== 1);
749 if (NULL
!= strstr (procdata
->munged_filename
, pattern
))
750 return VISIT_ACCEPTED
;
752 return VISIT_REJECTED
;
756 visit_substring_match_casefold_wide (struct process_data
*procdata
, void *context
)
758 const char *pattern
= context
;
760 if (NULL
!= mbscasestr (procdata
->munged_filename
, pattern
))
761 return VISIT_ACCEPTED
;
763 return VISIT_REJECTED
;
768 visit_substring_match_casefold_narrow (struct process_data
*procdata
, void *context
)
770 const char *pattern
= context
;
772 assert (MB_CUR_MAX
== 1);
773 if (NULL
!= strcasestr (procdata
->munged_filename
, pattern
))
774 return VISIT_ACCEPTED
;
776 return VISIT_REJECTED
;
781 visit_globmatch_nofold (struct process_data
*procdata
, void *context
)
783 const char *glob
= context
;
784 if (fnmatch (glob
, procdata
->munged_filename
, 0) != 0)
785 return VISIT_REJECTED
;
787 return VISIT_ACCEPTED
;
792 visit_globmatch_casefold (struct process_data
*procdata
, void *context
)
794 const char *glob
= context
;
795 if (fnmatch (glob
, procdata
->munged_filename
, FNM_CASEFOLD
) != 0)
796 return VISIT_REJECTED
;
798 return VISIT_ACCEPTED
;
803 visit_regex (struct process_data
*procdata
, void *context
)
805 struct regular_expression
*p
= context
;
806 const size_t len
= strlen (procdata
->munged_filename
);
808 int rv
= re_search (&p
->regex
, procdata
->munged_filename
,
810 (struct re_registers
*) NULL
);
813 return VISIT_REJECTED
; /* no match (-1), or internal error (-2) */
817 return VISIT_ACCEPTED
; /* match */
823 visit_stats (struct process_data
*procdata
, void *context
)
825 struct locate_stats
*p
= context
;
826 size_t len
= strlen (procdata
->original_filename
);
828 int highbit
, whitespace
, newline
;
830 ++(p
->total_filename_count
);
831 p
->total_filename_length
+= len
;
833 highbit
= whitespace
= newline
= 0;
834 for (s
=procdata
->original_filename
; *s
; ++s
)
836 if ( (int)(*s
) & 128 )
840 newline
= whitespace
= 1;
842 else if (isspace ((unsigned char)*s
))
849 ++(p
->highbit_filename_count
);
851 ++(p
->whitespace_count
);
853 ++(p
->newline_count
);
855 return VISIT_CONTINUE
;
860 visit_limit (struct process_data
*procdata
, void *context
)
862 struct locate_limits
*p
= context
;
866 if (++p
->items_accepted
>= p
->limit
)
869 return VISIT_CONTINUE
;
873 visit_count (struct process_data
*procdata
, void *context
)
875 struct locate_limits
*p
= context
;
880 return VISIT_CONTINUE
;
883 /* Emit the statistics.
886 print_stats (int argc
, size_t database_file_size
)
888 char hbuf1
[LONGEST_HUMAN_READABLE
+ 1];
889 char hbuf2
[LONGEST_HUMAN_READABLE
+ 1];
890 char hbuf3
[LONGEST_HUMAN_READABLE
+ 1];
891 char hbuf4
[LONGEST_HUMAN_READABLE
+ 1];
893 printf (ngettext ("Locate database size: %s byte\n",
894 "Locate database size: %s bytes\n",
896 human_readable ((uintmax_t) database_file_size
,
897 hbuf1
, human_ceiling
, 1, 1));
899 printf ( (results_were_filtered
?
900 _("Matching Filenames: %s\n") :
901 _("All Filenames: %s\n")),
902 human_readable (statistics
.total_filename_count
,
903 hbuf1
, human_ceiling
, 1, 1));
904 /* XXX: We would ideally use ngettext () here, but I don't know
905 * how to use it to handle more than one possibly-plural thing/
907 printf (_("File names have a cumulative length of %s bytes.\n"
908 "Of those file names,\n"
909 "\n\t%s contain whitespace, "
910 "\n\t%s contain newline characters, "
911 "\n\tand %s contain characters with the high bit set.\n"),
912 human_readable (statistics
.total_filename_length
, hbuf1
, human_ceiling
, 1, 1),
913 human_readable (statistics
.whitespace_count
, hbuf2
, human_ceiling
, 1, 1),
914 human_readable (statistics
.newline_count
, hbuf3
, human_ceiling
, 1, 1),
915 human_readable (statistics
.highbit_filename_count
, hbuf4
, human_ceiling
, 1, 1));
919 if (results_were_filtered
)
921 printf (_("Some filenames may have been filtered out, "
922 "so we cannot compute the compression ratio.\n"));
926 if (statistics
.total_filename_length
)
928 /* A negative compression ratio just means that the
929 * compressed database is larger than the list of
930 * filenames. This can happen for example for
931 * old-format databases containing a small list of short
932 * filenames, because the bigram list is 256 bytes.
934 printf (_("Compression ratio %4.2f%% (higher is better)\n"),
935 100.0 * ((double)statistics
.total_filename_length
936 - (double) database_file_size
)
937 / (double) statistics
.total_filename_length
);
941 printf (_("Compression ratio is undefined\n"));
949 * Return nonzero if the data we read in indicates that we are
950 * looking at a LOCATE02 locate database.
953 looking_at_gnu_locatedb (const char *data
, size_t len
)
955 if (len
< sizeof (LOCATEDB_MAGIC
))
957 else if (0 == memcmp (data
, LOCATEDB_MAGIC
, sizeof (LOCATEDB_MAGIC
)))
958 return 1; /* We saw the magic byte sequence */
964 * Return nonzero if the data we read in indicates that we are
965 * looking at an slocate database.
968 looking_at_slocate_locatedb (const char *filename
,
981 /* Check that the magic number is a one-byte string */
984 if (isdigit ((unsigned char)data
[0]))
986 /* looks promising. */
987 *seclevel
= (data
[0] - '0');
991 /* Hmm, well it's probably an slocate database
992 * of some awsomely huge security level, like 2.
993 * We don't know how to handle those.
996 _("locate database %s looks like an slocate "
997 "database but it seems to have security level %c, "
998 "which GNU findutils does not currently support"),
999 quotearg_n_style (0, locale_quoting_style
, filename
),
1016 /* Definitely not slocate. */
1024 i_am_little_endian (void)
1028 unsigned char uch
[4];
1033 u
.uch
[1] = u
.uch
[2] = u
.uch
[3] = 0;
1040 /* Print or count the entries in DBFILE that match shell globbing patterns in
1041 ARGV. Return the number of entries matched. */
1043 static unsigned long
1044 search_one_database (int argc
,
1053 struct locate_limits
*plimit
,
1059 char *pathpart
; /* A pattern to consider. */
1060 int argn
; /* Index to current pattern in argv. */
1061 int nread
; /* number of bytes read from an entry. */
1062 struct process_data procdata
; /* Storage for data shared with visitors. */
1063 int slocate_seclevel
;
1065 struct visitor
* pvis
; /* temp for determining past_pat_inspector. */
1066 const char *format_name
;
1067 enum ExistenceCheckType do_check_existence
;
1070 /* We may turn on existence checking for a given database.
1071 * We ensure that we can return to the previous behaviour
1072 * by using two variables, do_check_existence (which we act on)
1073 * and check_existence (whcih indicates the default before we
1074 * adjust it on the bassis of what kind of database we;re using
1076 do_check_existence
= check_existence
;
1080 regex_options
|= RE_ICASE
;
1083 procdata
.endian_state
= GetwordEndianStateInitial
;
1084 procdata
.len
= procdata
.count
= 0;
1085 procdata
.slocatedb_format
= 0;
1086 procdata
.itemcount
= 0;
1088 procdata
.dbfile
= dbfile
;
1091 /* Set up the inspection regime */
1093 lastinspector
= NULL
;
1094 past_pat_inspector
= NULL
;
1095 results_were_filtered
= false;
1096 procdata
.pathsize
= 128; /* Increased as necessary by locate_read_str. */
1097 procdata
.original_filename
= xmalloc (procdata
.pathsize
);
1100 nread
= fread (procdata
.original_filename
, 1, SLOCATE_DB_MAGIC_LEN
,
1102 slocate_seclevel
= 0;
1103 if (looking_at_slocate_locatedb (procdata
.dbfile
,
1104 procdata
.original_filename
,
1109 _("%s is an slocate database. "
1110 "Support for these is new, expect problems for now."),
1111 quotearg_n_style (0, locale_quoting_style
, procdata
.dbfile
));
1113 /* slocate also uses frcode, but with a different header.
1114 * We handle the header here and then work with the data
1115 * in the normal way.
1117 if (slocate_seclevel
> 1)
1119 /* We don't know what those security levels mean,
1120 * so do nothing further
1123 _("%s is an slocate database of unsupported security level %d; skipping it."),
1124 quotearg_n_style (0, locale_quoting_style
, procdata
.dbfile
),
1128 else if (slocate_seclevel
> 0)
1130 /* Don't show the filenames to the user if they don't exist.
1131 * Showing stats is safe since filenames are only counted
1132 * after the existence check
1134 if (ACCEPT_NON_EXISTING
== check_existence
)
1136 /* Do not allow the user to see a list of filenames that they
1140 _("You specified the -E option, but that option "
1141 "cannot be used with slocate-format databases "
1142 "with a non-zero security level. No results will be "
1143 "generated for this database.\n"));
1146 if (ACCEPT_EXISTING
!= do_check_existence
)
1148 if (enable_print
|| stats
)
1151 _("%s is an slocate database. "
1152 "Turning on the '-e' option."),
1153 quotearg_n_style (0, locale_quoting_style
, procdata
.dbfile
));
1155 do_check_existence
= ACCEPT_EXISTING
;
1158 add_visitor (visit_locate02_format
, NULL
);
1159 format_name
= "slocate";
1160 procdata
.slocatedb_format
= 1;
1166 procdata
.slocatedb_format
= 0;
1167 extend (&procdata
, sizeof (LOCATEDB_MAGIC
), 0u);
1168 nread2
= fread (procdata
.original_filename
+nread
, 1, sizeof (LOCATEDB_MAGIC
)-nread
,
1170 if (looking_at_gnu_locatedb (procdata
.original_filename
, nread
+nread2
))
1172 add_visitor (visit_locate02_format
, NULL
);
1173 format_name
= "GNU LOCATE02";
1175 else /* Use the old format */
1180 extend (&procdata
, 256u, 0u);
1181 /* Read the list of the most common bigrams in the database. */
1184 int more_read
= fread (procdata
.original_filename
+ nread
, 1,
1185 256 - nread
, procdata
.fp
);
1186 if ( (more_read
+ nread
) != 256 )
1188 error (EXIT_FAILURE
, 0,
1189 _("Old-format locate database %s is "
1190 "too short to be valid"),
1191 quotearg_n_style (0, locale_quoting_style
, dbfile
));
1196 for (i
= 0; i
< 128; i
++)
1198 procdata
.bigram1
[i
] = procdata
.original_filename
[i
<< 1];
1199 procdata
.bigram2
[i
] = procdata
.original_filename
[(i
<< 1) + 1];
1201 format_name
= "old";
1203 add_visitor (visit_old_format
, NULL
);
1208 add_visitor (visit_basename
, NULL
);
1210 /* Add an inspector for each pattern we're looking for. */
1211 for ( argn
= 0; argn
< argc
; argn
++ )
1213 results_were_filtered
= true;
1214 pathpart
= argv
[argn
];
1217 struct regular_expression
*p
= xmalloc (sizeof (*p
));
1218 const char *error_message
= NULL
;
1220 memset (&p
->regex
, 0, sizeof (p
->regex
));
1222 re_set_syntax (regex_options
);
1223 p
->regex
.allocated
= 100;
1224 p
->regex
.buffer
= xmalloc (p
->regex
.allocated
);
1225 p
->regex
.fastmap
= NULL
;
1226 p
->regex
.syntax
= regex_options
;
1227 p
->regex
.translate
= NULL
;
1229 error_message
= re_compile_pattern (pathpart
, strlen (pathpart
),
1233 error (EXIT_FAILURE
, 0, "%s", error_message
);
1237 add_visitor (visit_regex
, p
);
1240 else if (contains_metacharacter (pathpart
))
1243 add_visitor (visit_globmatch_casefold
, pathpart
);
1245 add_visitor (visit_globmatch_nofold
, pathpart
);
1249 /* No glob characters used. Hence we match on
1250 * _any part_ of the filename, not just the
1251 * basename. This seems odd to me, but it is the
1252 * traditional behaviour.
1253 * James Youngman <jay@gnu.org>
1256 if (1 == MB_CUR_MAX
)
1258 /* As an optimisation, use a strstr () matcher if we are
1259 * in a unibyte locale. This can give a x2 speedup in
1260 * the C locale. Some light testing reveals that
1261 * glibc's strstr () is somewhere around 40% faster than
1262 * gnulib's, so we just use strstr ().
1264 matcher
= ignore_case
?
1265 visit_substring_match_casefold_narrow
:
1266 visit_substring_match_nocasefold_narrow
;
1270 matcher
= ignore_case
?
1271 visit_substring_match_casefold_wide
:
1272 visit_substring_match_nocasefold_wide
;
1274 add_visitor (matcher
, pathpart
);
1278 pvis
= lastinspector
;
1280 /* We add visit_existing_*() as late as possible to reduce the
1281 * number of stat() calls.
1283 switch (do_check_existence
)
1285 case ACCEPT_EXISTING
:
1286 results_were_filtered
= true;
1287 if (follow_symlinks
) /* -L, default */
1288 add_visitor (visit_existing_follow
, NULL
);
1290 add_visitor (visit_existing_nofollow
, NULL
);
1293 case ACCEPT_NON_EXISTING
:
1294 results_were_filtered
= true;
1295 if (follow_symlinks
) /* -L, default */
1296 add_visitor (visit_non_existing_follow
, NULL
);
1298 add_visitor (visit_non_existing_nofollow
, NULL
);
1301 case ACCEPT_EITHER
: /* Default, neither -E nor -e */
1302 /* do nothing; no extra processing. */
1306 /* Security issue: The stats visitor must be added immediately
1307 * before the print visitor, because otherwise the -S option would
1308 * leak information about files that the caller cannot see.
1311 add_visitor (visit_stats
, &statistics
);
1315 if (print_quoted_filename
)
1316 add_visitor (visit_justprint_quoted
, NULL
);
1318 add_visitor (visit_justprint_unquoted
, NULL
);
1323 add_visitor (visit_limit
, plimit
);
1325 add_visitor (visit_count
, plimit
);
1330 past_pat_inspector
= pvis
->next
;
1332 mainprocessor
= process_and
;
1334 mainprocessor
= process_or
;
1337 mainprocessor
= process_simple
;
1341 printf (_("Database %s is in the %s format.\n"),
1347 procdata
.c
= getc (procdata
.fp
);
1348 /* If we are searching for filename patterns, the inspector list
1349 * will contain an entry for each pattern for which we are searching.
1351 while ( (procdata
.c
!= EOF
) &&
1352 (VISIT_ABORT
!= (mainprocessor
)(&procdata
)) )
1354 /* Do nothing; all the work is done in the visitor functions. */
1361 int host_little_endian
= i_am_little_endian ();
1362 const char *little
= _("The database has little-endian "
1363 "machine-word encoding.\n");
1364 const char *big
= _("The database has big-endian "
1365 "machine-word encoding.\n");
1367 if (GetwordEndianStateNative
== procdata
.endian_state
)
1369 printf ("%s", (host_little_endian
? little
: big
));
1371 else if (GetwordEndianStateSwab
== procdata
.endian_state
)
1373 printf ("%s", (host_little_endian
? big
: little
));
1377 printf (_("The database machine-word encoding order "
1378 "is not obvious.\n"));
1382 print_stats (argc
, filesize
);
1385 if (ferror (procdata
.fp
))
1387 error (0, errno
, "%s",
1388 quotearg_n_style (0, locale_quoting_style
, procdata
.dbfile
));
1391 return plimit
->items_accepted
;
1395 extern char *version_string
;
1398 usage (FILE *stream
)
1400 fprintf (stream
, _("\
1401 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1402 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1403 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1404 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap] [-s | --stdio]\n\
1405 [-A | --all] [-p | --print] [-r | --regex] [--regextype=TYPE]\n\
1406 [--max-database-age D] [--version] [--help]\n\
1409 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream
);
1413 REGEXTYPE_OPTION
= CHAR_MAX
+ 1,
1418 static struct option
const longopts
[] =
1420 {"database", required_argument
, NULL
, 'd'},
1421 {"existing", no_argument
, NULL
, 'e'},
1422 {"non-existing", no_argument
, NULL
, 'E'},
1423 {"ignore-case", no_argument
, NULL
, 'i'},
1424 {"all", no_argument
, NULL
, 'A'},
1425 {"help", no_argument
, NULL
, 'h'},
1426 {"version", no_argument
, NULL
, 'v'},
1427 {"null", no_argument
, NULL
, '0'},
1428 {"count", no_argument
, NULL
, 'c'},
1429 {"wholename", no_argument
, NULL
, 'w'},
1430 {"wholepath", no_argument
, NULL
, 'w'}, /* Synonym. */
1431 {"basename", no_argument
, NULL
, 'b'},
1432 {"print", no_argument
, NULL
, 'p'},
1433 {"stdio", no_argument
, NULL
, 's'},
1434 {"mmap", no_argument
, NULL
, 'm'},
1435 {"limit", required_argument
, NULL
, 'l'},
1436 {"regex", no_argument
, NULL
, 'r'},
1437 {"regextype", required_argument
, NULL
, REGEXTYPE_OPTION
},
1438 {"statistics", no_argument
, NULL
, 'S'},
1439 {"follow", no_argument
, NULL
, 'L'},
1440 {"nofollow", no_argument
, NULL
, 'P'},
1441 {"max-database-age", required_argument
, NULL
, MAX_DB_AGE
},
1442 {NULL
, no_argument
, NULL
, 0}
1449 const char * what
= "failed";
1450 const uid_t orig_euid
= geteuid ();
1451 const uid_t uid
= getuid ();
1452 const gid_t gid
= getgid ();
1455 /* Use of setgroups () is restricted to root only. */
1458 /* We're either root or running setuid-root. */
1461 if (0 != setgroups (1u, groups
))
1463 what
= _("failed to drop group privileges");
1469 /* Drop any setuid privileges */
1470 if (uid
!= orig_euid
)
1474 /* We're really root anyway, but are setuid to something else. Leave it. */
1479 if (0 != setuid (getuid ()))
1481 what
= _("failed to drop setuid privileges");
1485 /* Defend against the case where the attacker runs us with the
1486 * capability to call setuid () turned off, which on some systems
1487 * will cause the above attempt to drop privileges fail (leaving us
1492 /* Check that we can no longer switch bask to root */
1493 if (0 == setuid (0))
1495 what
= _("Failed to fully drop privileges");
1496 /* The errno value here is not interesting (since
1497 * the system call we are complaining about
1498 * succeeded when we wanted it to fail). Arrange
1499 * for the call to error () not to print the errno
1500 * value by setting errno=0.
1509 /* Drop any setgid privileges */
1511 if (0 != setgid (gid
))
1513 what
= _("failed to drop setgid privileges");
1521 error (EXIT_FAILURE
, errno
, "%s",
1522 quotearg_n_style (0, locale_quoting_style
, what
));
1530 /* deliberate infinite loop */
1535 opendb (const char *name
)
1537 int fd
= open (name
, O_RDONLY
1538 #if defined O_LARGEFILE
1544 /* Make sure it won't survive an exec */
1545 if (0 != fcntl (fd
, F_SETFD
, FD_CLOEXEC
))
1555 dolocate (int argc
, char **argv
, int secure_db_fd
)
1558 unsigned long int found
= 0uL;
1559 int ignore_case
= 0;
1562 int basename_only
= 0;
1565 int regex_options
= RE_SYNTAX_EMACS
;
1570 int they_chose_db
= 0;
1571 bool did_stdin
= false; /* Set to prevent rereading stdin. */
1574 set_program_name (argv
[0]);
1576 set_program_name ("locate");
1578 #ifdef HAVE_SETLOCALE
1579 setlocale (LC_ALL
, "");
1581 bindtextdomain (PACKAGE
, LOCALEDIR
);
1582 textdomain (PACKAGE
);
1583 atexit (close_stdout
);
1586 limits
.items_accepted
= 0;
1588 quote_opts
= clone_quoting_options (NULL
);
1589 print_quoted_filename
= true;
1591 /* We cannot simultaneously trust $LOCATE_PATH and use the
1592 * setuid-access-controlled database,, since that could cause a leak
1595 dbpath
= getenv ("LOCATE_PATH");
1601 check_existence
= ACCEPT_EITHER
;
1606 int optc
= getopt_long (argc
, argv
, "Abcd:eEil:prsm0SwHPL", longopts
,
1615 print_quoted_filename
= false; /* print filename 'raw'. */
1636 check_existence
= ACCEPT_EXISTING
;
1640 check_existence
= ACCEPT_NON_EXISTING
;
1652 /* XXX: nothing in the test suite for this option. */
1653 set_max_db_age (optarg
);
1661 display_findutils_version ("locate");
1672 case REGEXTYPE_OPTION
:
1673 regex_options
= get_regex_type (optarg
);
1681 follow_symlinks
= 1;
1684 /* In find, -P and -H differ in the way they handle paths
1685 * given on the command line. This is not relevant for
1686 * locate, but the -H option is supported because it is
1687 * probably more intuitive to do so.
1691 follow_symlinks
= 0;
1697 strtol_error err
= xstrtoumax (optarg
, &end
, 10, &limits
.limit
,
1699 if (LONGINT_OK
!= err
)
1700 xstrtol_fatal (err
, opti
, optc
, longopts
, optarg
);
1705 case 's': /* use stdio */
1706 case 'm': /* use mmap */
1707 /* These options are implemented simply for
1708 * compatibility with FreeBSD
1719 /* If the user gave the -d option or set LOCATE_PATH,
1720 * relinquish access to the secure database.
1724 if (secure_db_fd
>= 0)
1726 close (secure_db_fd
);
1731 if (!just_count
&& !stats
)
1741 if (!just_count
&& optind
== argc
)
1749 if (1 == isatty (STDOUT_FILENO
))
1750 stdout_is_a_tty
= true;
1752 stdout_is_a_tty
= false;
1755 next_element (dbpath
, 0); /* Initialize. */
1757 /* Bail out early if limit already reached. */
1758 while (!use_limit
|| limits
.limit
> limits
.items_accepted
)
1764 statistics
.compressed_bytes
=
1765 statistics
.total_filename_count
=
1766 statistics
.total_filename_length
=
1767 statistics
.whitespace_count
=
1768 statistics
.newline_count
=
1769 statistics
.highbit_filename_count
= 0u;
1773 /* Take the next element from the list of databases */
1774 e
= next_element ((char *) NULL
, 0);
1778 if (0 == strcmp (e
, "-"))
1783 _("warning: the locate database can only be read from stdin once."));
1795 if (0 == strlen (e
) || 0 == strcmp (e
, "."))
1800 /* open the database */
1804 error (0, errno
, "%s",
1805 quotearg_n_style (0, locale_quoting_style
, e
));
1812 if (-1 == secure_db_fd
)
1814 /* Already searched the database, it's time to exit the loop */
1819 e
= selected_secure_db
;
1825 /* Check the database to see if it is old. */
1826 if (fstat (fd
, &st
))
1828 error (0, errno
, "%s",
1829 quotearg_n_style (0, locale_quoting_style
, e
));
1830 /* continue anyway */
1831 filesize
= (off_t
)0;
1837 filesize
= st
.st_size
;
1839 if ((time_t)-1 == time (&now
))
1841 /* If we can't tell the time, we don't know how old the
1842 * database is. But since the message is just advisory,
1843 * we continue anyway.
1845 error (0, errno
, _("time system call failed"));
1849 double age
= difftime (now
, st
.st_mtime
);
1850 double warn_seconds
= SECONDS_PER_UNIT
* warn_number_units
;
1851 if (age
> warn_seconds
)
1854 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1856 _("warning: database %s is more than %d %s old (actual age is %.1f %s)"),
1857 quotearg_n_style (0, locale_quoting_style
, e
),
1858 warn_number_units
, _(warn_name_units
),
1859 (age
/(double)SECONDS_PER_UNIT
), _(warn_name_units
));
1864 fp
= fdopen (fd
, "r");
1867 error (0, errno
, "%s",
1868 quotearg_n_style (0, locale_quoting_style
, e
));
1872 /* Search this database for all patterns simultaneously */
1873 found
= search_one_database (argc
- optind
, &argv
[optind
],
1875 ignore_case
, print
, basename_only
,
1876 use_limit
, &limits
, stats
,
1877 op_and
, regex
, regex_options
);
1879 /* Close the databsase (even if it is stdin) */
1880 if (fclose (fp
) == EOF
)
1882 error (0, errno
, "%s",
1883 quotearg_n_style (0, locale_quoting_style
, e
));
1890 printf ("%ld\n", found
);
1893 if (found
|| (use_limit
&& (limits
.limit
==0)) || stats
)
1899 #define ARRAYSIZE(a) (sizeof (a)/sizeof (a[0]))
1901 open_secure_db (void)
1905 const char * secure_db_list
[] =
1908 "/var/lib/slocate/slocate.db",
1911 for (i
=0; secure_db_list
[i
]; ++i
)
1913 fd
= opendb (secure_db_list
[i
]);
1916 selected_secure_db
= secure_db_list
[i
];
1924 main (int argc
, char **argv
)
1926 int dbfd
= open_secure_db ();
1929 return dolocate (argc
, argv
, dbfd
);