4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25 * Copyright (c) 2019, Allan Jude
26 * Copyright (c) 2019, Klara Inc.
32 #include <sys/zfs_context.h>
41 #include <sys/zfs_refcount.h>
44 * Used by arc_flush() to inform arc_evict_state() that it should evict
45 * all available buffers from the arc state being passed in.
47 #define ARC_EVICT_ALL UINT64_MAX
50 * ZFS gets very unhappy when the maximum ARC size is smaller than the maximum
51 * block size and a larger block is written. To leave some safety margin, we
52 * limit the minimum for zfs_arc_max to the maximium transaction size.
54 #define MIN_ARC_MAX DMU_MAX_ACCESS
56 #define HDR_SET_LSIZE(hdr, x) do { \
57 ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \
58 (hdr)->b_lsize = ((x) >> SPA_MINBLOCKSHIFT); \
61 #define HDR_SET_PSIZE(hdr, x) do { \
62 ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \
63 (hdr)->b_psize = ((x) >> SPA_MINBLOCKSHIFT); \
66 /* The l2size in the header is only used by L2 cache */
67 #define HDR_SET_L2SIZE(hdr, x) do { \
68 ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \
69 (hdr)->b_l2size = ((x) >> SPA_MINBLOCKSHIFT); \
72 #define HDR_GET_LSIZE(hdr) ((hdr)->b_lsize << SPA_MINBLOCKSHIFT)
73 #define HDR_GET_PSIZE(hdr) ((hdr)->b_psize << SPA_MINBLOCKSHIFT)
74 #define HDR_GET_L2SIZE(hdr) ((hdr)->b_l2size << SPA_MINBLOCKSHIFT)
76 typedef struct arc_buf_hdr arc_buf_hdr_t
;
77 typedef struct arc_buf arc_buf_t
;
78 typedef struct arc_prune arc_prune_t
;
81 * Because the ARC can store encrypted data, errors (not due to bugs) may arise
82 * while transforming data into its desired format - specifically, when
83 * decrypting, the key may not be present, or the HMAC may not be correct
84 * which signifies deliberate tampering with the on-disk state
85 * (assuming that the checksum was correct). If any error occurs, the "buf"
86 * parameter will be NULL.
88 typedef void arc_read_done_func_t(zio_t
*zio
, const zbookmark_phys_t
*zb
,
89 const blkptr_t
*bp
, arc_buf_t
*buf
, void *priv
);
90 typedef void arc_write_done_func_t(zio_t
*zio
, arc_buf_t
*buf
, void *priv
);
91 typedef void arc_prune_func_t(uint64_t bytes
, void *priv
);
93 /* Shared module parameters */
94 extern uint_t zfs_arc_average_blocksize
;
95 extern int l2arc_exclude_special
;
97 /* generic arc_done_func_t's which you can use */
98 arc_read_done_func_t arc_bcopy_func
;
99 arc_read_done_func_t arc_getbuf_func
;
101 /* generic arc_prune_func_t wrapper for callbacks */
103 arc_prune_func_t
*p_pfunc
;
107 zfs_refcount_t p_refcnt
;
110 typedef enum arc_strategy
{
111 ARC_STRATEGY_META_ONLY
= 0, /* Evict only meta data buffers */
112 ARC_STRATEGY_META_BALANCED
= 1, /* Evict data buffers if needed */
115 typedef enum arc_flags
118 * Public flags that can be passed into the ARC by external consumers.
120 ARC_FLAG_WAIT
= 1 << 0, /* perform sync I/O */
121 ARC_FLAG_NOWAIT
= 1 << 1, /* perform async I/O */
122 ARC_FLAG_PREFETCH
= 1 << 2, /* I/O is a prefetch */
123 ARC_FLAG_CACHED
= 1 << 3, /* I/O was in cache */
124 ARC_FLAG_L2CACHE
= 1 << 4, /* cache in L2ARC */
125 ARC_FLAG_UNCACHED
= 1 << 5, /* evict after use */
126 ARC_FLAG_PRESCIENT_PREFETCH
= 1 << 6, /* long min lifespan */
129 * Private ARC flags. These flags are private ARC only flags that
130 * will show up in b_flags in the arc_buf_hdr_t. These flags should
131 * only be set by ARC code.
133 ARC_FLAG_IN_HASH_TABLE
= 1 << 7, /* buffer is hashed */
134 ARC_FLAG_IO_IN_PROGRESS
= 1 << 8, /* I/O in progress */
135 ARC_FLAG_IO_ERROR
= 1 << 9, /* I/O failed for buf */
136 ARC_FLAG_INDIRECT
= 1 << 10, /* indirect block */
137 /* Indicates that block was read with ASYNC priority. */
138 ARC_FLAG_PRIO_ASYNC_READ
= 1 << 11,
139 ARC_FLAG_L2_WRITING
= 1 << 12, /* write in progress */
140 ARC_FLAG_L2_EVICTED
= 1 << 13, /* evicted during I/O */
141 ARC_FLAG_L2_WRITE_HEAD
= 1 << 14, /* head of write list */
143 * Encrypted or authenticated on disk (may be plaintext in memory).
144 * This header has b_crypt_hdr allocated. Does not include indirect
145 * blocks with checksums of MACs which will also have their X
146 * (encrypted) bit set in the bp.
148 ARC_FLAG_PROTECTED
= 1 << 15,
149 /* data has not been authenticated yet */
150 ARC_FLAG_NOAUTH
= 1 << 16,
151 /* indicates that the buffer contains metadata (otherwise, data) */
152 ARC_FLAG_BUFC_METADATA
= 1 << 17,
154 /* Flags specifying whether optional hdr struct fields are defined */
155 ARC_FLAG_HAS_L1HDR
= 1 << 18,
156 ARC_FLAG_HAS_L2HDR
= 1 << 19,
159 * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
160 * This allows the l2arc to use the blkptr's checksum to verify
161 * the data without having to store the checksum in the hdr.
163 ARC_FLAG_COMPRESSED_ARC
= 1 << 20,
164 ARC_FLAG_SHARED_DATA
= 1 << 21,
167 * Fail this arc_read() (with ENOENT) if the data is not already present
170 ARC_FLAG_CACHED_ONLY
= 1 << 22,
173 * Don't instantiate an arc_buf_t for arc_read_done.
175 ARC_FLAG_NO_BUF
= 1 << 23,
178 * The arc buffer's compression mode is stored in the top 7 bits of the
179 * flags field, so these dummy flags are included so that MDB can
180 * interpret the enum properly.
182 ARC_FLAG_COMPRESS_0
= 1 << 24,
183 ARC_FLAG_COMPRESS_1
= 1 << 25,
184 ARC_FLAG_COMPRESS_2
= 1 << 26,
185 ARC_FLAG_COMPRESS_3
= 1 << 27,
186 ARC_FLAG_COMPRESS_4
= 1 << 28,
187 ARC_FLAG_COMPRESS_5
= 1 << 29,
188 ARC_FLAG_COMPRESS_6
= 1 << 30
191 typedef enum arc_buf_flags
{
192 ARC_BUF_FLAG_SHARED
= 1 << 0,
193 ARC_BUF_FLAG_COMPRESSED
= 1 << 1,
195 * indicates whether this arc_buf_t is encrypted, regardless of
198 ARC_BUF_FLAG_ENCRYPTED
= 1 << 2
202 arc_buf_hdr_t
*b_hdr
;
205 arc_buf_flags_t b_flags
;
208 typedef enum arc_buf_contents
{
209 ARC_BUFC_DATA
, /* buffer contains data */
210 ARC_BUFC_METADATA
, /* buffer contains metadata */
212 } arc_buf_contents_t
;
215 * The following breakdowns of arc_size exist for kstat only.
217 typedef enum arc_space_type
{
225 ARC_SPACE_ABD_CHUNK_WASTE
,
229 typedef enum arc_state_type
{
240 typedef struct arc_buf_info
{
241 arc_state_type_t abi_state_type
;
242 arc_buf_contents_t abi_state_contents
;
248 uint32_t abi_mru_hits
;
249 uint32_t abi_mru_ghost_hits
;
250 uint32_t abi_mfu_hits
;
251 uint32_t abi_mfu_ghost_hits
;
252 uint32_t abi_l2arc_hits
;
254 uint64_t abi_l2arc_dattr
;
255 uint64_t abi_l2arc_asize
;
256 enum zio_compress abi_l2arc_compress
;
260 * Flags returned by arc_cached; describes which part of the arc
261 * the block is cached in.
263 #define ARC_CACHED_EMBEDDED (1U << 0)
264 #define ARC_CACHED_IN_L1 (1U << 1)
265 #define ARC_CACHED_IN_MRU (1U << 2)
266 #define ARC_CACHED_IN_MFU (1U << 3)
267 #define ARC_CACHED_IN_L2 (1U << 4)
269 void arc_space_consume(uint64_t space
, arc_space_type_t type
);
270 void arc_space_return(uint64_t space
, arc_space_type_t type
);
271 boolean_t
arc_is_metadata(arc_buf_t
*buf
);
272 boolean_t
arc_is_encrypted(arc_buf_t
*buf
);
273 boolean_t
arc_is_unauthenticated(arc_buf_t
*buf
);
274 enum zio_compress
arc_get_compression(arc_buf_t
*buf
);
275 void arc_get_raw_params(arc_buf_t
*buf
, boolean_t
*byteorder
, uint8_t *salt
,
276 uint8_t *iv
, uint8_t *mac
);
277 int arc_untransform(arc_buf_t
*buf
, spa_t
*spa
, const zbookmark_phys_t
*zb
,
279 void arc_convert_to_raw(arc_buf_t
*buf
, uint64_t dsobj
, boolean_t byteorder
,
280 dmu_object_type_t ot
, const uint8_t *salt
, const uint8_t *iv
,
282 arc_buf_t
*arc_alloc_buf(spa_t
*spa
, const void *tag
, arc_buf_contents_t type
,
284 arc_buf_t
*arc_alloc_compressed_buf(spa_t
*spa
, const void *tag
,
285 uint64_t psize
, uint64_t lsize
, enum zio_compress compression_type
,
287 arc_buf_t
*arc_alloc_raw_buf(spa_t
*spa
, const void *tag
, uint64_t dsobj
,
288 boolean_t byteorder
, const uint8_t *salt
, const uint8_t *iv
,
289 const uint8_t *mac
, dmu_object_type_t ot
, uint64_t psize
, uint64_t lsize
,
290 enum zio_compress compression_type
, uint8_t complevel
);
291 uint8_t arc_get_complevel(arc_buf_t
*buf
);
292 arc_buf_t
*arc_loan_buf(spa_t
*spa
, boolean_t is_metadata
, int size
);
293 arc_buf_t
*arc_loan_compressed_buf(spa_t
*spa
, uint64_t psize
, uint64_t lsize
,
294 enum zio_compress compression_type
, uint8_t complevel
);
295 arc_buf_t
*arc_loan_raw_buf(spa_t
*spa
, uint64_t dsobj
, boolean_t byteorder
,
296 const uint8_t *salt
, const uint8_t *iv
, const uint8_t *mac
,
297 dmu_object_type_t ot
, uint64_t psize
, uint64_t lsize
,
298 enum zio_compress compression_type
, uint8_t complevel
);
299 void arc_return_buf(arc_buf_t
*buf
, const void *tag
);
300 void arc_loan_inuse_buf(arc_buf_t
*buf
, const void *tag
);
301 void arc_buf_destroy(arc_buf_t
*buf
, const void *tag
);
302 void arc_buf_info(arc_buf_t
*buf
, arc_buf_info_t
*abi
, int state_index
);
303 uint64_t arc_buf_size(arc_buf_t
*buf
);
304 uint64_t arc_buf_lsize(arc_buf_t
*buf
);
305 void arc_buf_access(arc_buf_t
*buf
);
306 void arc_release(arc_buf_t
*buf
, const void *tag
);
307 int arc_released(arc_buf_t
*buf
);
308 void arc_buf_sigsegv(int sig
, siginfo_t
*si
, void *unused
);
309 void arc_buf_freeze(arc_buf_t
*buf
);
310 void arc_buf_thaw(arc_buf_t
*buf
);
312 int arc_referenced(arc_buf_t
*buf
);
314 #define arc_referenced(buf) ((void) sizeof (buf), 0)
317 int arc_read(zio_t
*pio
, spa_t
*spa
, const blkptr_t
*bp
,
318 arc_read_done_func_t
*done
, void *priv
, zio_priority_t priority
,
319 int flags
, arc_flags_t
*arc_flags
, const zbookmark_phys_t
*zb
);
320 zio_t
*arc_write(zio_t
*pio
, spa_t
*spa
, uint64_t txg
, blkptr_t
*bp
,
321 arc_buf_t
*buf
, boolean_t uncached
, boolean_t l2arc
, const zio_prop_t
*zp
,
322 arc_write_done_func_t
*ready
, arc_write_done_func_t
*child_ready
,
323 arc_write_done_func_t
*done
, void *priv
, zio_priority_t priority
,
324 int zio_flags
, const zbookmark_phys_t
*zb
);
326 arc_prune_t
*arc_add_prune_callback(arc_prune_func_t
*func
, void *priv
);
327 void arc_remove_prune_callback(arc_prune_t
*p
);
328 void arc_freed(spa_t
*spa
, const blkptr_t
*bp
);
329 int arc_cached(spa_t
*spa
, const blkptr_t
*bp
);
331 void arc_flush(spa_t
*spa
, boolean_t retry
);
332 void arc_flush_async(spa_t
*spa
);
333 void arc_tempreserve_clear(uint64_t reserve
);
334 int arc_tempreserve_space(spa_t
*spa
, uint64_t reserve
, uint64_t txg
);
335 boolean_t
arc_async_flush_guid_inuse(uint64_t load_guid
);
337 uint64_t arc_all_memory(void);
338 uint64_t arc_default_max(uint64_t min
, uint64_t allmem
);
339 uint64_t arc_target_bytes(void);
340 void arc_set_limits(uint64_t);
348 void l2arc_add_vdev(spa_t
*spa
, vdev_t
*vd
);
349 void l2arc_remove_vdev(vdev_t
*vd
);
350 boolean_t
l2arc_vdev_present(vdev_t
*vd
);
351 void l2arc_rebuild_vdev(vdev_t
*vd
, boolean_t reopen
);
352 boolean_t
l2arc_range_check_overlap(uint64_t bottom
, uint64_t top
,
354 void l2arc_init(void);
355 void l2arc_fini(void);
356 void l2arc_start(void);
357 void l2arc_stop(void);
358 void l2arc_spa_rebuild_start(spa_t
*spa
);
359 void l2arc_spa_rebuild_stop(spa_t
*spa
);
362 extern boolean_t arc_watch
;
369 #endif /* _SYS_ARC_H */