2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
8 * This file is part of the SPL, Solaris Porting Layer.
10 * The SPL is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2 of the License, or (at your
13 * option) any later version.
15 * The SPL is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * You should have received a copy of the GNU General Public License along
21 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 * Solaris Porting Layer (SPL) Proc Implementation.
26 * Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
29 #include <sys/systeminfo.h>
30 #include <sys/kstat.h>
32 #include <sys/kmem_cache.h>
35 #include <linux/ctype.h>
36 #include <linux/kmod.h>
37 #include <linux/seq_file.h>
38 #include <linux/uaccess.h>
39 #include <linux/version.h>
40 #include "zfs_gitrev.h"
42 #if defined(CONSTIFY_PLUGIN)
43 typedef struct ctl_table __no_const spl_ctl_table
;
45 typedef struct ctl_table spl_ctl_table
;
48 #ifdef HAVE_PROC_HANDLER_CTL_TABLE_CONST
49 #define CONST_CTL_TABLE const struct ctl_table
51 #define CONST_CTL_TABLE struct ctl_table
54 static unsigned long table_min
= 0;
55 static unsigned long table_max
= ~0;
57 static struct ctl_table_header
*spl_header
= NULL
;
58 #ifndef HAVE_REGISTER_SYSCTL_TABLE
59 static struct ctl_table_header
*spl_kmem
= NULL
;
60 static struct ctl_table_header
*spl_kstat
= NULL
;
62 static struct proc_dir_entry
*proc_spl
= NULL
;
63 static struct proc_dir_entry
*proc_spl_kmem
= NULL
;
64 static struct proc_dir_entry
*proc_spl_kmem_slab
= NULL
;
65 struct proc_dir_entry
*proc_spl_kstat
= NULL
;
69 proc_domemused(CONST_CTL_TABLE
*table
, int write
,
70 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
74 spl_ctl_table dummy
= *table
;
77 dummy
.proc_handler
= &proc_dointvec
;
78 dummy
.extra1
= &table_min
;
79 dummy
.extra2
= &table_max
;
84 #ifdef HAVE_ATOMIC64_T
85 val
= atomic64_read((atomic64_t
*)table
->data
);
87 val
= atomic_read((atomic_t
*)table
->data
);
88 #endif /* HAVE_ATOMIC64_T */
89 rc
= proc_doulongvec_minmax(&dummy
, write
, buffer
, lenp
, ppos
);
94 #endif /* DEBUG_KMEM */
97 proc_doslab(CONST_CTL_TABLE
*table
, int write
,
98 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
101 unsigned long val
= 0, mask
;
102 spl_ctl_table dummy
= *table
;
103 spl_kmem_cache_t
*skc
= NULL
;
106 dummy
.proc_handler
= &proc_dointvec
;
107 dummy
.extra1
= &table_min
;
108 dummy
.extra2
= &table_max
;
113 down_read(&spl_kmem_cache_sem
);
114 mask
= (unsigned long)table
->data
;
116 list_for_each_entry(skc
, &spl_kmem_cache_list
, skc_list
) {
118 /* Only use slabs of the correct kmem/vmem type */
119 if (!(skc
->skc_flags
& mask
))
122 /* Sum the specified field for selected slabs */
123 switch (mask
& (KMC_TOTAL
| KMC_ALLOC
| KMC_MAX
)) {
125 val
+= skc
->skc_slab_size
* skc
->skc_slab_total
;
128 val
+= skc
->skc_obj_size
* skc
->skc_obj_alloc
;
131 val
+= skc
->skc_obj_size
* skc
->skc_obj_max
;
136 up_read(&spl_kmem_cache_sem
);
137 rc
= proc_doulongvec_minmax(&dummy
, write
, buffer
, lenp
, ppos
);
144 proc_dohostid(CONST_CTL_TABLE
*table
, int write
,
145 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
149 spl_ctl_table dummy
= *table
;
152 dummy
.maxlen
= sizeof (str
) - 1;
155 snprintf(str
, sizeof (str
), "%lx",
156 (unsigned long) zone_get_hostid(NULL
));
158 /* always returns 0 */
159 proc_dostring(&dummy
, write
, buffer
, lenp
, ppos
);
163 * We can't use proc_doulongvec_minmax() in the write
164 * case here because hostid, while a hex value, has no
165 * leading 0x, which confuses the helper function.
168 hid
= simple_strtoul(str
, &end
, 16);
178 slab_seq_show_headers(struct seq_file
*f
)
181 "--------------------- cache ----------"
182 "--------------------------------------------- "
185 "--- emergency ---\n");
188 " flags size alloc slabsize objsize "
191 "dlock alloc max\n");
195 slab_seq_show(struct seq_file
*f
, void *p
)
197 spl_kmem_cache_t
*skc
= p
;
199 ASSERT(skc
->skc_magic
== SKC_MAGIC
);
201 if (skc
->skc_flags
& KMC_SLAB
) {
203 * This cache is backed by a generic Linux kmem cache which
204 * has its own accounting. For these caches we only track
205 * the number of active allocated objects that exist within
206 * the underlying Linux slabs. For the overall statistics of
207 * the underlying Linux cache please refer to /proc/slabinfo.
209 spin_lock(&skc
->skc_lock
);
210 uint64_t objs_allocated
=
211 percpu_counter_sum(&skc
->skc_linux_alloc
);
212 seq_printf(f
, "%-36s ", skc
->skc_name
);
213 seq_printf(f
, "0x%05lx %9s %9lu %8s %8u "
214 "%5s %5s %5s %5s %5lu %5s %5s %5s %5s\n",
215 (long unsigned)skc
->skc_flags
,
217 (long unsigned)(skc
->skc_obj_size
* objs_allocated
),
219 (unsigned)skc
->skc_obj_size
,
221 (long unsigned)objs_allocated
,
223 spin_unlock(&skc
->skc_lock
);
227 spin_lock(&skc
->skc_lock
);
228 seq_printf(f
, "%-36s ", skc
->skc_name
);
229 seq_printf(f
, "0x%05lx %9lu %9lu %8u %8u "
230 "%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n",
231 (long unsigned)skc
->skc_flags
,
232 (long unsigned)(skc
->skc_slab_size
* skc
->skc_slab_total
),
233 (long unsigned)(skc
->skc_obj_size
* skc
->skc_obj_alloc
),
234 (unsigned)skc
->skc_slab_size
,
235 (unsigned)skc
->skc_obj_size
,
236 (long unsigned)skc
->skc_slab_total
,
237 (long unsigned)skc
->skc_slab_alloc
,
238 (long unsigned)skc
->skc_slab_max
,
239 (long unsigned)skc
->skc_obj_total
,
240 (long unsigned)skc
->skc_obj_alloc
,
241 (long unsigned)skc
->skc_obj_max
,
242 (long unsigned)skc
->skc_obj_deadlock
,
243 (long unsigned)skc
->skc_obj_emergency
,
244 (long unsigned)skc
->skc_obj_emergency_max
);
245 spin_unlock(&skc
->skc_lock
);
250 slab_seq_start(struct seq_file
*f
, loff_t
*pos
)
255 down_read(&spl_kmem_cache_sem
);
257 slab_seq_show_headers(f
);
259 p
= spl_kmem_cache_list
.next
;
262 if (p
== &spl_kmem_cache_list
)
266 return (list_entry(p
, spl_kmem_cache_t
, skc_list
));
270 slab_seq_next(struct seq_file
*f
, void *p
, loff_t
*pos
)
272 spl_kmem_cache_t
*skc
= p
;
275 return ((skc
->skc_list
.next
== &spl_kmem_cache_list
) ?
276 NULL
: list_entry(skc
->skc_list
.next
, spl_kmem_cache_t
, skc_list
));
280 slab_seq_stop(struct seq_file
*f
, void *v
)
282 up_read(&spl_kmem_cache_sem
);
285 static const struct seq_operations slab_seq_ops
= {
286 .show
= slab_seq_show
,
287 .start
= slab_seq_start
,
288 .next
= slab_seq_next
,
289 .stop
= slab_seq_stop
,
293 proc_slab_open(struct inode
*inode
, struct file
*filp
)
295 return (seq_open(filp
, &slab_seq_ops
));
298 static const kstat_proc_op_t proc_slab_operations
= {
299 #ifdef HAVE_PROC_OPS_STRUCT
300 .proc_open
= proc_slab_open
,
301 .proc_read
= seq_read
,
302 .proc_lseek
= seq_lseek
,
303 .proc_release
= seq_release
,
305 .open
= proc_slab_open
,
308 .release
= seq_release
,
312 static struct ctl_table spl_kmem_table
[] = {
315 .procname
= "kmem_used",
316 .data
= &kmem_alloc_used
,
317 #ifdef HAVE_ATOMIC64_T
318 .maxlen
= sizeof (atomic64_t
),
320 .maxlen
= sizeof (atomic_t
),
321 #endif /* HAVE_ATOMIC64_T */
323 .proc_handler
= &proc_domemused
,
326 .procname
= "kmem_max",
327 .data
= &kmem_alloc_max
,
328 .maxlen
= sizeof (unsigned long),
329 .extra1
= &table_min
,
330 .extra2
= &table_max
,
332 .proc_handler
= &proc_doulongvec_minmax
,
334 #endif /* DEBUG_KMEM */
336 .procname
= "slab_kvmem_total",
337 .data
= (void *)(KMC_KVMEM
| KMC_TOTAL
),
338 .maxlen
= sizeof (unsigned long),
339 .extra1
= &table_min
,
340 .extra2
= &table_max
,
342 .proc_handler
= &proc_doslab
,
345 .procname
= "slab_kvmem_alloc",
346 .data
= (void *)(KMC_KVMEM
| KMC_ALLOC
),
347 .maxlen
= sizeof (unsigned long),
348 .extra1
= &table_min
,
349 .extra2
= &table_max
,
351 .proc_handler
= &proc_doslab
,
354 .procname
= "slab_kvmem_max",
355 .data
= (void *)(KMC_KVMEM
| KMC_MAX
),
356 .maxlen
= sizeof (unsigned long),
357 .extra1
= &table_min
,
358 .extra2
= &table_max
,
360 .proc_handler
= &proc_doslab
,
365 static struct ctl_table spl_kstat_table
[] = {
369 static struct ctl_table spl_table
[] = {
371 * NB No .strategy entries have been provided since
372 * sysctl(8) prefers to go via /proc for portability.
375 .procname
= "gitrev",
376 .data
= (char *)ZFS_META_GITREV
,
377 .maxlen
= sizeof (ZFS_META_GITREV
),
379 .proc_handler
= &proc_dostring
,
382 .procname
= "hostid",
384 .maxlen
= sizeof (unsigned long),
386 .proc_handler
= &proc_dohostid
,
388 #ifdef HAVE_REGISTER_SYSCTL_TABLE
392 .child
= spl_kmem_table
,
397 .child
= spl_kstat_table
,
403 #ifdef HAVE_REGISTER_SYSCTL_TABLE
404 static struct ctl_table spl_dir
[] = {
413 static struct ctl_table spl_root
[] = {
415 .procname
= "kernel",
423 static void spl_proc_cleanup(void)
425 remove_proc_entry("kstat", proc_spl
);
426 remove_proc_entry("slab", proc_spl_kmem
);
427 remove_proc_entry("kmem", proc_spl
);
428 remove_proc_entry("spl", NULL
);
430 #ifndef HAVE_REGISTER_SYSCTL_TABLE
432 unregister_sysctl_table(spl_kstat
);
436 unregister_sysctl_table(spl_kmem
);
441 unregister_sysctl_table(spl_header
);
446 #ifndef HAVE_REGISTER_SYSCTL_TABLE
449 * Traditionally, struct ctl_table arrays have been terminated by an "empty"
450 * sentinel element (specifically, one with .procname == NULL).
452 * Linux 6.6 began migrating away from this, adding register_sysctl_sz() so
453 * that callers could provide the size directly, and redefining
454 * register_sysctl() to just call register_sysctl_sz() with the array size. It
455 * retained support for the terminating element so that existing callers would
458 * Linux 6.11 removed support for the terminating element, instead interpreting
459 * it as a real malformed element, and rejecting it.
461 * In order to continue support older kernels, we retain the terminating
462 * sentinel element for our sysctl tables, but instead detect availability of
463 * register_sysctl_sz(). If it exists, we pass it the array size -1, stopping
464 * the kernel from trying to process the terminator. For pre-6.6 kernels that
465 * don't have register_sysctl_sz(), we just use register_sysctl(), which can
466 * handle the terminating element as it always has.
468 #ifdef HAVE_REGISTER_SYSCTL_SZ
469 #define spl_proc_register_sysctl(p, t) \
470 register_sysctl_sz(p, t, ARRAY_SIZE(t)-1)
472 #define spl_proc_register_sysctl(p, t) \
473 register_sysctl(p, t)
482 #ifdef HAVE_REGISTER_SYSCTL_TABLE
483 spl_header
= register_sysctl_table(spl_root
);
484 if (spl_header
== NULL
)
487 spl_header
= spl_proc_register_sysctl("kernel/spl", spl_table
);
488 if (spl_header
== NULL
)
491 spl_kmem
= spl_proc_register_sysctl("kernel/spl/kmem", spl_kmem_table
);
492 if (spl_kmem
== NULL
) {
496 spl_kstat
= spl_proc_register_sysctl("kernel/spl/kstat",
498 if (spl_kstat
== NULL
) {
504 proc_spl
= proc_mkdir("spl", NULL
);
505 if (proc_spl
== NULL
) {
510 proc_spl_kmem
= proc_mkdir("kmem", proc_spl
);
511 if (proc_spl_kmem
== NULL
) {
516 proc_spl_kmem_slab
= proc_create_data("slab", 0444, proc_spl_kmem
,
517 &proc_slab_operations
, NULL
);
518 if (proc_spl_kmem_slab
== NULL
) {
523 proc_spl_kstat
= proc_mkdir("kstat", proc_spl
);
524 if (proc_spl_kstat
== NULL
) {