1 /* MIB service - main.c - request abstraction and first-level tree */
3 * This is the Management Information Base (MIB) service. Its one and only
4 * task is to implement the sysctl(2) system call, which plays a fairly
5 * important role in parts of *BSD userland.
7 * The sysctl(2) interface is used to access a variety of information. In
8 * order to obtain that information, and possibly modify it, the MIB service
9 * calls into many other services. The MIB service must therefore not be
10 * called directly from other services, with the exception of ProcFS. In fact,
11 * ProcFS is currently the only service that is modeled as logically higher in
12 * the MINIX3 service stack than MIB, something that itself is possible only
13 * due to the nonblocking nature of VFS. MIB may issue blocking calls to VFS.
15 * The MIB service is in the boot image because even init(8) makes use of
16 * sysctl(2) during its own startup, so launching the MIB service at any later
17 * time would make a proper implementation of sysctl(2) impossible. Also, the
18 * service needs superuser privileges because it may need to issue privileged
19 * calls and obtain privileged information from other services.
21 * While most of the sysctl tree is maintained locally, the MIB service also
22 * allows other services to register "remote" subtrees which are then handled
23 * entirely by those services. This feature, which works much like file system
24 * mounting, allows 1) sysctl handling code to stay local to its corresponding
25 * service, and 2) parts of the sysctl tree to adapt and expand dynamically as
26 * optional services are started and stopped. Compared to the MIB service's
27 * local handling, remotely handled subtrees are subject to several additional
28 * practical restrictions, hoever. In the current implementation, the MIB
29 * service makes blocking calls to remote services as needed; in the future,
30 * these interactions could be made (more) asynchronous.
32 * The MIB service was created by David van Moolenbroek <david@minix3.org>.
38 * Most of these initially empty nodes are filled in by their corresponding
39 * modules' _init calls; see mib_init below. However, some subtrees are not
40 * populated by the MIB service itself. CTL_NET is expected to be populated
41 * through registration of remote subtrees. The libc sysctl(3) wrapper code
42 * takes care of the CTL_USER subtree. It must have an entry here though, or
43 * sysctl(8) will not list it. CTL_VENDOR is also empty, but writable, so that
44 * it may be used by third parties.
46 static struct mib_node mib_table
[] = {
47 /* 1*/ [CTL_KERN
] = MIB_ENODE(_P
| _RO
, "kern", "High kernel"),
48 /* 2*/ [CTL_VM
] = MIB_ENODE(_P
| _RO
, "vm", "Virtual memory"),
49 /* 4*/ [CTL_NET
] = MIB_ENODE(_P
| _RO
, "net", "Networking"),
50 /* 6*/ [CTL_HW
] = MIB_ENODE(_P
| _RO
, "hw", "Generic CPU, I/O"),
51 /* 8*/ [CTL_USER
] = MIB_ENODE(_P
| _RO
, "user", "User-level"),
52 /*11*/ [CTL_VENDOR
] = MIB_ENODE(_P
| _RW
, "vendor", "Vendor specific"),
53 /*32*/ [CTL_MINIX
] = MIB_ENODE(_P
| _RO
, "minix", "MINIX3 specific"),
57 * The root node of the tree. The root node is used internally only--it is
58 * impossible to access the root node itself from userland in any way. The
59 * node is writable by default, so that programs such as init(8) may create
60 * their own top-level entries.
62 struct mib_node mib_root
= MIB_NODE(_RW
, mib_table
, "", "");
65 * Structures describing old and new data as provided by userland. The primary
66 * advantage of these opaque structures is that we could in principle use them
67 * to implement storage of small data results in the sysctl reply message, so
68 * as to avoid the kernel copy, without changing any of the handler code.
71 endpoint_t oldp_endpt
;
76 * Same structure, different type: prevent accidental mixups, and avoid the
77 * need to use __restrict everywhere.
80 endpoint_t newp_endpt
;
86 * Return TRUE or FALSE indicating whether the given offset is within the range
87 * of data that is to be copied out. This call can be used to test whether
88 * certain bits of data need to be prepared for copying at all.
91 mib_inrange(struct mib_oldp
* oldp
, size_t off
)
97 return (off
< oldp
->oldp_len
);
101 * Return the total length of the requested data. This should not be used
102 * directly except in highly unusual cases, such as particular node requests
103 * where the request semantics blatantly violate overall sysctl(2) semantics.
106 mib_getoldlen(struct mib_oldp
* oldp
)
112 return oldp
->oldp_len
;
116 * Copy out (partial) data to the user. The copy is automatically limited to
117 * the range of data requested by the user. Return the requested length on
118 * success (for the caller's convenience) or an error code on failure.
121 mib_copyout(struct mib_oldp
* __restrict oldp
, size_t off
,
122 const void * __restrict buf
, size_t size
)
128 assert(len
<= SSIZE_MAX
);
130 if (oldp
== NULL
|| off
>= oldp
->oldp_len
)
131 return size
; /* nothing to do */
133 if (len
> oldp
->oldp_len
- off
)
134 len
= oldp
->oldp_len
- off
;
136 if ((r
= sys_datacopy(SELF
, (vir_bytes
)buf
, oldp
->oldp_endpt
,
137 oldp
->oldp_addr
+ off
, len
)) != OK
)
144 * Override the oldlen value returned from the call, in situations where an
145 * error is thrown as well.
148 mib_setoldlen(struct mib_call
* call
, size_t oldlen
)
151 call
->call_reslen
= oldlen
;
155 * Return the new data length as provided by the user, or 0 if the user did not
159 mib_getnewlen(struct mib_newp
* newp
)
165 return newp
->newp_len
;
169 * Copy in data from the user. The given length must match exactly the length
170 * given by the user. Return OK or an error code.
173 mib_copyin(struct mib_newp
* __restrict newp
, void * __restrict buf
,
177 if (newp
== NULL
|| len
!= newp
->newp_len
)
183 return sys_datacopy(newp
->newp_endpt
, newp
->newp_addr
, SELF
,
184 (vir_bytes
)buf
, len
);
188 * Copy in auxiliary data from the user, based on a user pointer obtained from
189 * data copied in earlier through mib_copyin().
192 mib_copyin_aux(struct mib_newp
* __restrict newp
, vir_bytes addr
,
193 void * __restrict buf
, size_t len
)
196 assert(newp
!= NULL
);
201 return sys_datacopy(newp
->newp_endpt
, addr
, SELF
, (vir_bytes
)buf
, len
);
205 * Create a grant for a call's old data region, if not NULL, for the given
206 * endpoint. On success, store the grant (or GRANT_INVALID) in grantp and the
207 * length in lenp, and return OK. On error, return an error code that must not
211 mib_relay_oldp(endpoint_t endpt
, struct mib_oldp
* __restrict oldp
,
212 cp_grant_id_t
* grantp
, size_t * __restrict lenp
)
216 *grantp
= cpf_grant_magic(endpt
, oldp
->oldp_endpt
,
217 oldp
->oldp_addr
, oldp
->oldp_len
, CPF_WRITE
);
218 if (!GRANT_VALID(*grantp
))
220 *lenp
= oldp
->oldp_len
;
222 *grantp
= GRANT_INVALID
;
230 * Create a grant for a call's new data region, if not NULL, for the given
231 * endpoint. On success, store the grant (or GRANT_INVALID) in grantp and the
232 * length in lenp, and return OK. On error, return an error code that must not
236 mib_relay_newp(endpoint_t endpt
, struct mib_newp
* __restrict newp
,
237 cp_grant_id_t
* grantp
, size_t * __restrict lenp
)
241 *grantp
= cpf_grant_magic(endpt
, newp
->newp_endpt
,
242 newp
->newp_addr
, newp
->newp_len
, CPF_READ
);
243 if (!GRANT_VALID(*grantp
))
245 *lenp
= newp
->newp_len
;
247 *grantp
= GRANT_INVALID
;
255 * Check whether the user is allowed to perform privileged operations. The
256 * function returns a nonzero value if this is the case, and zero otherwise.
257 * Authorization is performed only once per call.
260 mib_authed(struct mib_call
* call
)
263 if ((call
->call_flags
& (MIB_FLAG_AUTH
| MIB_FLAG_NOAUTH
)) == 0) {
264 /* Ask PM if this endpoint has superuser privileges. */
265 if (getnuid(call
->call_endpt
) == SUPER_USER
)
266 call
->call_flags
|= MIB_FLAG_AUTH
;
268 call
->call_flags
|= MIB_FLAG_NOAUTH
;
271 return (call
->call_flags
& MIB_FLAG_AUTH
);
275 * Implement the sysctl(2) system call.
278 mib_sysctl(message
* __restrict m_in
, int ipc_status
,
279 message
* __restrict m_out
)
281 vir_bytes oldaddr
, newaddr
;
282 size_t oldlen
, newlen
;
283 unsigned int namelen
;
284 int s
, name
[CTL_MAXNAME
];
286 struct mib_oldp oldp
, *oldpp
;
287 struct mib_newp newp
, *newpp
;
288 struct mib_call call
;
291 /* Only handle blocking calls. Ignore everything else. */
292 if (IPC_STATUS_CALL(ipc_status
) != SENDREC
)
295 endpt
= m_in
->m_source
;
296 oldaddr
= m_in
->m_lc_mib_sysctl
.oldp
;
297 oldlen
= m_in
->m_lc_mib_sysctl
.oldlen
;
298 newaddr
= m_in
->m_lc_mib_sysctl
.newp
;
299 newlen
= m_in
->m_lc_mib_sysctl
.newlen
;
300 namelen
= m_in
->m_lc_mib_sysctl
.namelen
;
302 if (namelen
== 0 || namelen
> CTL_MAXNAME
)
306 * In most cases, the entire name fits in the request message, so we
307 * can avoid a kernel copy.
309 if (namelen
> CTL_SHORTNAME
) {
310 if ((s
= sys_datacopy(endpt
, m_in
->m_lc_mib_sysctl
.namep
, SELF
,
311 (vir_bytes
)&name
, sizeof(name
[0]) * namelen
)) != OK
)
314 memcpy(name
, m_in
->m_lc_mib_sysctl
.name
,
315 sizeof(name
[0]) * namelen
);
318 * Set up a structure for the old data, if any. When no old address is
319 * given, be forgiving if oldlen is not zero, as the user may simply
320 * not have initialized the variable before passing a pointer to it.
323 oldp
.oldp_endpt
= endpt
;
324 oldp
.oldp_addr
= oldaddr
;
325 oldp
.oldp_len
= oldlen
;
331 * Set up a structure for the new data, if any. If one of newaddr and
332 * newlen is zero but not the other, we (like NetBSD) disregard both.
334 if (newaddr
!= 0 && newlen
!= 0) {
335 newp
.newp_endpt
= endpt
;
336 newp
.newp_addr
= newaddr
;
337 newp
.newp_len
= newlen
;
343 * Set up a structure for other call parameters. Most of these should
344 * be used rarely, and we may want to add more later, so do not pass
345 * all of them around as actual function parameters all the time.
347 call
.call_endpt
= endpt
;
348 call
.call_name
= name
;
349 call
.call_namelen
= namelen
;
351 call
.call_reslen
= 0;
353 r
= mib_dispatch(&call
, oldpp
, newpp
);
356 * From NetBSD: we copy out as much as we can from the old data, while
357 * at the same time computing the full data length. Then, here at the
358 * end, if the entire result did not fit in the destination buffer, we
359 * return ENOMEM instead of success, thus also returning a partial
360 * result and the full data length.
362 * It is also possible that data are copied out along with a "real"
363 * error. In that case, we must report a nonzero resulting length
364 * along with that error code. This is currently the case when node
365 * creation resulted in a collision, in which case the error code is
366 * EEXIST while the existing node is copied out as well.
369 m_out
->m_mib_lc_sysctl
.oldlen
= (size_t)r
;
371 if (oldaddr
!= 0 && oldlen
< (size_t)r
)
376 m_out
->m_mib_lc_sysctl
.oldlen
= call
.call_reslen
;
382 * Initialize the service.
385 mib_init(int type __unused
, sef_init_info_t
* info __unused
)
389 * Initialize pointers and sizes of subtrees in different modules.
390 * This is needed because we cannot use sizeof on external arrays.
391 * We do initialize the node entry (including any other fields)
392 * statically through MIB_ENODE because that forces the array to be
393 * large enough to store the entry.
395 mib_kern_init(&mib_table
[CTL_KERN
]);
396 mib_vm_init(&mib_table
[CTL_VM
]);
397 mib_hw_init(&mib_table
[CTL_HW
]);
398 mib_minix_init(&mib_table
[CTL_MINIX
]);
401 * Now that the static tree is complete, go through the entire tree,
402 * initializing miscellaneous fields.
406 /* Prepare for requests to mount remote subtrees. */
413 * Perform SEF startup.
419 sef_setcb_init_fresh(mib_init
);
421 * If we restart we lose all dynamic state, which means we lose all
422 * nodes that have been created at run time. However, running with
423 * only the static node tree is still better than not running at all.
425 sef_setcb_init_restart(mib_init
);
431 * The Management Information Base (MIB) service.
439 /* Perform initialization. */
442 /* The main message loop. */
444 /* Receive a request. */
445 if ((r
= sef_receive_status(ANY
, &m_in
, &ipc_status
)) != OK
)
446 panic("sef_receive failed: %d", r
);
448 /* Process the request. */
449 if (is_ipc_notify(ipc_status
)) {
450 /* We are not expecting any notifications. */
451 printf("MIB: notification from %d\n", m_in
.m_source
);
456 memset(&m_out
, 0, sizeof(m_out
));
458 switch (m_in
.m_type
) {
460 r
= mib_sysctl(&m_in
, ipc_status
, &m_out
);
465 r
= mib_register(&m_in
, ipc_status
);
470 r
= mib_deregister(&m_in
, ipc_status
);
475 if (IPC_STATUS_CALL(ipc_status
) == SENDREC
)
481 /* Send a reply, if applicable. */
482 if (r
!= EDONTREPLY
) {
485 if ((r
= ipc_sendnb(m_in
.m_source
, &m_out
)) != OK
)
486 printf("MIB: ipc_sendnb failed (%d)\n", r
);