1 /* Copyright (c) 2017-2021, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
6 * @brief Consensus and diff on-disk cache.
9 #include "core/or/or.h"
11 #include "app/config/config.h"
12 #include "feature/dircache/conscache.h"
13 #include "lib/crypt_ops/crypto_util.h"
14 #include "lib/fs/storagedir.h"
15 #include "lib/encoding/confline.h"
17 #define CCE_MAGIC 0x17162253
20 /* On Windows, unlink won't work on a file if the file is actively mmap()ed.
21 * That forces us to be less aggressive about unlinking files, and causes other
22 * changes throughout our logic.
24 #define MUST_UNMAP_TO_UNLINK
25 #endif /* defined(_WIN32) */
28 * A consensus_cache_entry_t is a reference-counted handle to an
29 * item in a consensus_cache_t. It can be mmapped into RAM, or not,
30 * depending whether it's currently in use.
32 struct consensus_cache_entry_t
{
33 uint32_t magic
; /**< Must be set to CCE_MAGIC */
34 HANDLE_ENTRY(consensus_cache_entry
, consensus_cache_entry_t
);
35 int32_t refcnt
; /**< Reference count. */
36 unsigned can_remove
: 1; /**< If true, we want to delete this file. */
37 /** If true, we intend to unmap this file as soon as we're done with it. */
38 unsigned release_aggressively
: 1;
40 /** Filename for this object within the storage_dir_t */
42 /** Labels associated with this object. Immutable once the object
44 config_line_t
*labels
;
45 /** Pointer to the cache that includes this entry (if any). */
46 consensus_cache_t
*in_cache
;
48 /** Since what time has this object been mapped into RAM, but with the cache
49 * being the only having a reference to it? */
51 /** mmaped contents of the underlying file. May be NULL */
53 /** Length of the body within <b>map</b>. */
55 /** Pointer to the body within <b>map</b>. */
60 * A consensus_cache_t holds a directory full of labeled items.
62 struct consensus_cache_t
{
63 /** Underling storage_dir_t to handle persistence */
65 /** List of all the entries in the directory. */
68 /** The maximum number of entries that we'd like to allow in this cache.
69 * This is the same as the storagedir limit when MUST_UNMAP_TO_UNLINK is
74 static void consensus_cache_clear(consensus_cache_t
*cache
);
75 static void consensus_cache_rescan(consensus_cache_t
*);
76 static void consensus_cache_entry_map(consensus_cache_t
*,
77 consensus_cache_entry_t
*);
78 static void consensus_cache_entry_unmap(consensus_cache_entry_t
*ent
);
81 * Helper: Open a consensus cache in subdirectory <b>subdir</b> of the
82 * data directory, to hold up to <b>max_entries</b> of data.
85 consensus_cache_open(const char *subdir
, int max_entries
)
87 int storagedir_max_entries
;
88 consensus_cache_t
*cache
= tor_malloc_zero(sizeof(consensus_cache_t
));
89 char *directory
= get_cachedir_fname(subdir
);
90 cache
->max_entries
= max_entries
;
92 #ifdef MUST_UNMAP_TO_UNLINK
93 /* If we can't unlink the files that we're still using, then we need to
94 * tell the storagedir backend to allow far more files than this consensus
95 * cache actually wants, so that it can hold files which, from this cache's
96 * perspective, have become useless.
98 #define VERY_LARGE_STORAGEDIR_LIMIT (1000*1000)
99 storagedir_max_entries
= VERY_LARGE_STORAGEDIR_LIMIT
;
100 #else /* !defined(MUST_UNMAP_TO_UNLINK) */
101 /* Otherwise, we can just tell the storagedir to use the same limits
103 storagedir_max_entries
= max_entries
;
104 #endif /* defined(MUST_UNMAP_TO_UNLINK) */
106 cache
->dir
= storage_dir_new(directory
, storagedir_max_entries
);
113 consensus_cache_rescan(cache
);
117 /** Return true if it's okay to put more entries in this cache than
118 * its official file limit.
120 * (We need this method on Windows, where we can't unlink files that are still
121 * in use, and therefore might need to temporarily exceed the file limit until
122 * the no-longer-wanted files are deletable.)
125 consensus_cache_may_overallocate(consensus_cache_t
*cache
)
128 #ifdef MUST_UNMAP_TO_UNLINK
135 // HACK: GCC on Appveyor hates that we may assert before returning. Work around
139 #pragma GCC diagnostic push
140 #pragma GCC diagnostic ignored "-Wsuggest-attribute=noreturn"
142 #endif /* defined(_WIN32) */
145 * Tell the sandbox (if any) configured by <b>cfg</b> to allow the
146 * operations that <b>cache</b> will need.
149 consensus_cache_register_with_sandbox(consensus_cache_t
*cache
,
150 struct sandbox_cfg_elem_t
**cfg
)
152 #ifdef MUST_UNMAP_TO_UNLINK
153 /* Our Linux sandbox doesn't support huge file lists like the one that would
154 * be generated by using VERY_LARGE_STORAGEDIR_LIMIT above in
155 * consensus_cache_open(). Since the Linux sandbox is the only one we have
156 * right now, we just assert that we never reach this point when we've had
157 * to use VERY_LARGE_STORAGEDIR_LIMIT.
159 * If at some point in the future we have a different sandbox mechanism that
160 * can handle huge file lists, we can remove this assertion or make it
163 tor_assert_nonfatal_unreached();
164 #endif /* defined(MUST_UNMAP_TO_UNLINK) */
165 return storage_dir_register_with_sandbox(cache
->dir
, cfg
);
170 #pragma GCC diagnostic pop
175 * Helper: clear all entries from <b>cache</b> (but do not delete
176 * any that aren't marked for removal
179 consensus_cache_clear(consensus_cache_t
*cache
)
181 consensus_cache_delete_pending(cache
, 0);
183 SMARTLIST_FOREACH_BEGIN(cache
->entries
, consensus_cache_entry_t
*, ent
) {
184 ent
->in_cache
= NULL
;
185 consensus_cache_entry_decref(ent
);
186 } SMARTLIST_FOREACH_END(ent
);
187 smartlist_free(cache
->entries
);
188 cache
->entries
= NULL
;
192 * Drop all storage held by <b>cache</b>.
195 consensus_cache_free_(consensus_cache_t
*cache
)
200 if (cache
->entries
) {
201 consensus_cache_clear(cache
);
203 storage_dir_free(cache
->dir
);
208 * Write <b>datalen</b> bytes of data at <b>data</b> into the <b>cache</b>,
209 * labeling that data with <b>labels</b>. On failure, return NULL. On
210 * success, return a newly created consensus_cache_entry_t.
212 * The returned value will be owned by the cache, and you will have a
213 * reference to it. Call consensus_cache_entry_decref() when you are
216 * The provided <b>labels</b> MUST have distinct keys: if they don't,
217 * this API does not specify which values (if any) for the duplicate keys
218 * will be considered.
220 consensus_cache_entry_t
*
221 consensus_cache_add(consensus_cache_t
*cache
,
222 const config_line_t
*labels
,
227 int r
= storage_dir_save_labeled_to_file(cache
->dir
,
228 labels
, data
, datalen
, &fname
);
229 if (r
< 0 || fname
== NULL
) {
232 consensus_cache_entry_t
*ent
=
233 tor_malloc_zero(sizeof(consensus_cache_entry_t
));
234 ent
->magic
= CCE_MAGIC
;
236 ent
->labels
= config_lines_dup(labels
);
237 ent
->in_cache
= cache
;
238 ent
->unused_since
= TIME_MAX
;
239 smartlist_add(cache
->entries
, ent
);
240 /* Start the reference count at 2: the caller owns one copy, and the
241 * cache owns another.
249 * Given a <b>cache</b>, return some entry for which <b>key</b>=<b>value</b>.
250 * Return NULL if no such entry exists.
252 * Does not adjust reference counts.
254 consensus_cache_entry_t
*
255 consensus_cache_find_first(consensus_cache_t
*cache
,
259 smartlist_t
*tmp
= smartlist_new();
260 consensus_cache_find_all(tmp
, cache
, key
, value
);
261 consensus_cache_entry_t
*ent
= NULL
;
262 if (smartlist_len(tmp
))
263 ent
= smartlist_get(tmp
, 0);
269 * Given a <b>cache</b>, add every entry to <b>out</b> for which
270 * <b>key</b>=<b>value</b>. If <b>key</b> is NULL, add every entry.
272 * Do not add any entry that has been marked for removal.
274 * Does not adjust reference counts.
277 consensus_cache_find_all(smartlist_t
*out
,
278 consensus_cache_t
*cache
,
282 SMARTLIST_FOREACH_BEGIN(cache
->entries
, consensus_cache_entry_t
*, ent
) {
283 if (ent
->can_remove
== 1) {
284 /* We want to delete this; pretend it isn't there. */
288 smartlist_add(out
, ent
);
291 const char *found_val
= consensus_cache_entry_get_value(ent
, key
);
292 if (found_val
&& !strcmp(value
, found_val
)) {
293 smartlist_add(out
, ent
);
295 } SMARTLIST_FOREACH_END(ent
);
299 * Given a list of consensus_cache_entry_t, remove all those entries
300 * that do not have <b>key</b>=<b>value</b> in their labels.
302 * Does not adjust reference counts.
305 consensus_cache_filter_list(smartlist_t
*lst
,
309 if (BUG(lst
== NULL
))
310 return; // LCOV_EXCL_LINE
313 SMARTLIST_FOREACH_BEGIN(lst
, consensus_cache_entry_t
*, ent
) {
314 const char *found_val
= consensus_cache_entry_get_value(ent
, key
);
315 if (! found_val
|| strcmp(value
, found_val
)) {
316 SMARTLIST_DEL_CURRENT(lst
, ent
);
318 } SMARTLIST_FOREACH_END(ent
);
322 * If <b>ent</b> has a label with the given <b>key</b>, return its
323 * value. Otherwise return NULL.
325 * The return value is only guaranteed to be valid for as long as you
326 * hold a reference to <b>ent</b>.
329 consensus_cache_entry_get_value(const consensus_cache_entry_t
*ent
,
332 const config_line_t
*match
= config_line_find(ent
->labels
, key
);
340 * Return a pointer to the labels in <b>ent</b>.
342 * This pointer is only guaranteed to be valid for as long as you
343 * hold a reference to <b>ent</b>.
345 const config_line_t
*
346 consensus_cache_entry_get_labels(const consensus_cache_entry_t
*ent
)
352 * Increase the reference count of <b>ent</b>.
355 consensus_cache_entry_incref(consensus_cache_entry_t
*ent
)
357 if (BUG(ent
->magic
!= CCE_MAGIC
))
358 return; // LCOV_EXCL_LINE
360 ent
->unused_since
= TIME_MAX
;
364 * Release a reference held to <b>ent</b>.
366 * If it was the last reference, ent will be freed. Therefore, you must not
367 * use <b>ent</b> after calling this function.
370 consensus_cache_entry_decref(consensus_cache_entry_t
*ent
)
374 if (BUG(ent
->refcnt
<= 0))
375 return; // LCOV_EXCL_LINE
376 if (BUG(ent
->magic
!= CCE_MAGIC
))
377 return; // LCOV_EXCL_LINE
381 if (ent
->refcnt
== 1 && ent
->in_cache
) {
382 /* Only the cache has a reference: we don't need to keep the file
385 if (ent
->release_aggressively
) {
386 consensus_cache_entry_unmap(ent
);
388 ent
->unused_since
= approx_time();
397 /* Refcount is zero; we can free it. */
399 consensus_cache_entry_unmap(ent
);
401 tor_free(ent
->fname
);
402 config_free_lines(ent
->labels
);
403 consensus_cache_entry_handles_clear(ent
);
404 memwipe(ent
, 0, sizeof(consensus_cache_entry_t
));
409 * Mark <b>ent</b> for deletion from the cache. Deletion will not occur
410 * until the cache is the only place that holds a reference to <b>ent</b>.
413 consensus_cache_entry_mark_for_removal(consensus_cache_entry_t
*ent
)
419 * Mark <b>ent</b> as the kind of entry that we don't need to keep mmap'd for
420 * any longer than we're actually using it.
423 consensus_cache_entry_mark_for_aggressive_release(consensus_cache_entry_t
*ent
)
425 ent
->release_aggressively
= 1;
429 * Try to read the body of <b>ent</b> into memory if it isn't already
430 * loaded. On success, set *<b>body_out</b> to the body, *<b>sz_out</b>
431 * to its size, and return 0. On failure return -1.
433 * The resulting body pointer will only be valid for as long as you
434 * hold a reference to <b>ent</b>.
437 consensus_cache_entry_get_body(const consensus_cache_entry_t
*ent
,
438 const uint8_t **body_out
,
441 if (BUG(ent
->magic
!= CCE_MAGIC
))
442 return -1; // LCOV_EXCL_LINE
448 consensus_cache_entry_map((consensus_cache_t
*)ent
->in_cache
,
449 (consensus_cache_entry_t
*)ent
);
455 *body_out
= ent
->body
;
456 *sz_out
= ent
->bodylen
;
461 * Unmap every mmap'd element of <b>cache</b> that has been unused
462 * since <b>cutoff</b>.
465 consensus_cache_unmap_lazy(consensus_cache_t
*cache
, time_t cutoff
)
467 SMARTLIST_FOREACH_BEGIN(cache
->entries
, consensus_cache_entry_t
*, ent
) {
468 tor_assert_nonfatal(ent
->in_cache
== cache
);
469 if (ent
->refcnt
> 1 || BUG(ent
->in_cache
== NULL
)) {
470 /* Somebody is using this entry right now */
473 if (ent
->unused_since
> cutoff
) {
474 /* Has been unused only for a little while */
477 if (ent
->map
== NULL
) {
478 /* Not actually mapped. */
481 consensus_cache_entry_unmap(ent
);
482 } SMARTLIST_FOREACH_END(ent
);
486 * Return the number of currently unused filenames available in this cache.
489 consensus_cache_get_n_filenames_available(consensus_cache_t
*cache
)
492 int max
= cache
->max_entries
;
493 int used
= smartlist_len(storage_dir_list(cache
->dir
));
494 #ifdef MUST_UNMAP_TO_UNLINK
498 tor_assert_nonfatal(max
>= used
);
499 #endif /* defined(MUST_UNMAP_TO_UNLINK) */
504 * Delete every element of <b>cache</b> has been marked with
505 * consensus_cache_entry_mark_for_removal. If <b>force</b> is false,
506 * retain those entries which are in use by something other than the cache.
509 consensus_cache_delete_pending(consensus_cache_t
*cache
, int force
)
511 SMARTLIST_FOREACH_BEGIN(cache
->entries
, consensus_cache_entry_t
*, ent
) {
512 tor_assert_nonfatal(ent
->in_cache
== cache
);
513 int force_ent
= force
;
514 #ifdef MUST_UNMAP_TO_UNLINK
515 /* We cannot delete anything with an active mmap on win32, so no
520 #endif /* defined(MUST_UNMAP_TO_UNLINK) */
522 if (ent
->refcnt
> 1 || BUG(ent
->in_cache
== NULL
)) {
523 /* Somebody is using this entry right now */
527 if (ent
->can_remove
== 0) {
528 /* Don't want to delete this. */
531 if (BUG(ent
->refcnt
<= 0)) {
532 continue; // LCOV_EXCL_LINE
535 SMARTLIST_DEL_CURRENT(cache
->entries
, ent
);
536 ent
->in_cache
= NULL
;
537 char *fname
= tor_strdup(ent
->fname
); /* save a copy */
538 consensus_cache_entry_decref(ent
);
539 storage_dir_remove_file(cache
->dir
, fname
);
541 } SMARTLIST_FOREACH_END(ent
);
545 * Internal helper: rescan <b>cache</b> and rebuild its list of entries.
548 consensus_cache_rescan(consensus_cache_t
*cache
)
550 if (cache
->entries
) {
551 consensus_cache_clear(cache
);
554 cache
->entries
= smartlist_new();
555 const smartlist_t
*fnames
= storage_dir_list(cache
->dir
);
556 SMARTLIST_FOREACH_BEGIN(fnames
, const char *, fname
) {
557 tor_mmap_t
*map
= NULL
;
558 config_line_t
*labels
= NULL
;
561 map
= storage_dir_map_labeled(cache
->dir
, fname
,
562 &labels
, &body
, &bodylen
);
564 /* The ERANGE error might come from tor_mmap_file() -- it means the file
565 * was empty. EINVAL might come from ..map_labeled() -- it means the
566 * file was misformatted. In both cases, we should just delete it.
568 if (errno
== ERANGE
|| errno
== EINVAL
) {
569 log_warn(LD_FS
, "Found %s file %s in consensus cache; removing it.",
570 errno
== ERANGE
? "empty" : "misformatted",
572 storage_dir_remove_file(cache
->dir
, fname
);
574 /* Can't load this; continue */
575 log_warn(LD_FS
, "Unable to map file %s from consensus cache: %s",
576 escaped(fname
), strerror(errno
));
580 consensus_cache_entry_t
*ent
=
581 tor_malloc_zero(sizeof(consensus_cache_entry_t
));
582 ent
->magic
= CCE_MAGIC
;
583 ent
->fname
= tor_strdup(fname
);
584 ent
->labels
= labels
;
586 ent
->in_cache
= cache
;
587 ent
->unused_since
= TIME_MAX
;
588 smartlist_add(cache
->entries
, ent
);
589 tor_munmap_file(map
); /* don't actually need to keep this around */
590 } SMARTLIST_FOREACH_END(fname
);
594 * Make sure that <b>ent</b> is mapped into RAM.
597 consensus_cache_entry_map(consensus_cache_t
*cache
,
598 consensus_cache_entry_t
*ent
)
603 ent
->map
= storage_dir_map_labeled(cache
->dir
, ent
->fname
,
604 NULL
, &ent
->body
, &ent
->bodylen
);
605 ent
->unused_since
= TIME_MAX
;
609 * Unmap <b>ent</b> from RAM.
611 * Do not call this if something other than the cache is holding a reference
615 consensus_cache_entry_unmap(consensus_cache_entry_t
*ent
)
617 ent
->unused_since
= TIME_MAX
;
621 tor_munmap_file(ent
->map
);
625 ent
->unused_since
= TIME_MAX
;
628 HANDLE_IMPL(consensus_cache_entry
, consensus_cache_entry_t
, )
630 #ifdef TOR_UNIT_TESTS
632 * Testing only: Return true iff <b>ent</b> is mapped into memory.
634 * (In normal operation, this information is not exposed.)
637 consensus_cache_entry_is_mapped(consensus_cache_entry_t
*ent
)
640 tor_assert(ent
->body
);
643 tor_assert(!ent
->body
);
647 #endif /* defined(TOR_UNIT_TESTS) */