minix/servers/mib/main.c

   1 /* MIB service - main.c - request abstraction and first-level tree */
   2 /*
   3  * This is the Management Information Base (MIB) service.  Its one and only
   4  * task is to implement the sysctl(2) system call, which plays a fairly
   5  * important role in parts of *BSD userland.
   6  *
   7  * The sysctl(2) interface is used to access a variety of information.  In
   8  * order to obtain that information, and possibly modify it, the MIB service
   9  * calls into many other services.  The MIB service must therefore not be
  10  * called directly from other services, with the exception of ProcFS.  In fact,
  11  * ProcFS is currently the only service that is modeled as logically higher in
  12  * the MINIX3 service stack than MIB, something that itself is possible only
  13  * due to the nonblocking nature of VFS.  MIB may issue blocking calls to VFS.
  14  *
  15  * The MIB service is in the boot image because even init(8) makes use of
  16  * sysctl(2) during its own startup, so launching the MIB service at any later
  17  * time would make a proper implementation of sysctl(2) impossible.  Also, the
  18  * service needs superuser privileges because it may need to issue privileged
  19  * calls and obtain privileged information from other services.
  20  *
  21  * While most of the sysctl tree is maintained locally, the MIB service also
  22  * allows other services to register "remote" subtrees which are then handled
  23  * entirely by those services.  This feature, which works much like file system
  24  * mounting, allows 1) sysctl handling code to stay local to its corresponding
  25  * service, and 2) parts of the sysctl tree to adapt and expand dynamically as
  26  * optional services are started and stopped.  Compared to the MIB service's
  27  * local handling, remotely handled subtrees are subject to several additional
  28  * practical restrictions, hoever.  In the current implementation, the MIB
  29  * service makes blocking calls to remote services as needed; in the future,
  30  * these interactions could be made (more) asynchronous.
  31  *
  32  * The MIB service was created by David van Moolenbroek <david@minix3.org>.
  33  */
  34
  35 #include "mib.h"
  36
  37 /*
  38  * Most of these initially empty nodes are filled in by their corresponding
  39  * modules' _init calls; see mib_init below.  However, some subtrees are not
  40  * populated by the MIB service itself.  CTL_NET is expected to be populated
  41  * through registration of remote subtrees.  The libc sysctl(3) wrapper code
  42  * takes care of the CTL_USER subtree.  It must have an entry here though, or
  43  * sysctl(8) will not list it.  CTL_VENDOR is also empty, but writable, so that
  44  * it may be used by third parties.
  45  */
  46 static struct mib_node mib_table[] = {
  47 /* 1*/  [CTL_KERN]      = MIB_ENODE(_P | _RO, "kern", "High kernel"),
  48 /* 2*/  [CTL_VM]        = MIB_ENODE(_P | _RO, "vm", "Virtual memory"),
  49 /* 4*/  [CTL_NET]       = MIB_ENODE(_P | _RO, "net", "Networking"),
  50 /* 6*/  [CTL_HW]        = MIB_ENODE(_P | _RO, "hw", "Generic CPU, I/O"),
  51 /* 8*/  [CTL_USER]      = MIB_ENODE(_P | _RO, "user", "User-level"),
  52 /*11*/  [CTL_VENDOR]    = MIB_ENODE(_P | _RW, "vendor", "Vendor specific"),
  53 /*32*/  [CTL_MINIX]     = MIB_ENODE(_P | _RO, "minix", "MINIX3 specific"),
  54 };
  55
  56 /*
  57  * The root node of the tree.  The root node is used internally only--it is
  58  * impossible to access the root node itself from userland in any way.  The
  59  * node is writable by default, so that programs such as init(8) may create
  60  * their own top-level entries.
  61  */
  62 struct mib_node mib_root = MIB_NODE(_RW, mib_table, "", "");
  63
  64 /*
  65  * Structures describing old and new data as provided by userland.  The primary
  66  * advantage of these opaque structures is that we could in principle use them
  67  * to implement storage of small data results in the sysctl reply message, so
  68  * as to avoid the kernel copy, without changing any of the handler code.
  69  */
  70 struct mib_oldp {
  71         endpoint_t oldp_endpt;
  72         vir_bytes oldp_addr;
  73         size_t oldp_len;
  74 };
  75 /*
  76  * Same structure, different type: prevent accidental mixups, and avoid the
  77  * need to use __restrict everywhere.
  78  */
  79 struct mib_newp {
  80         endpoint_t newp_endpt;
  81         vir_bytes newp_addr;
  82         size_t newp_len;
  83 };
  84
  85 /*
  86  * Return TRUE or FALSE indicating whether the given offset is within the range
  87  * of data that is to be copied out.  This call can be used to test whether
  88  * certain bits of data need to be prepared for copying at all.
  89  */
  90 int
  91 mib_inrange(struct mib_oldp * oldp, size_t off)
  92 {
  93
  94         if (oldp == NULL)
  95                 return FALSE;
  96
  97         return (off < oldp->oldp_len);
  98 }
  99
 100 /*
 101  * Return the total length of the requested data.  This should not be used
 102  * directly except in highly unusual cases, such as particular node requests
 103  * where the request semantics blatantly violate overall sysctl(2) semantics.
 104  */
 105 size_t
 106 mib_getoldlen(struct mib_oldp * oldp)
 107 {
 108
 109         if (oldp == NULL)
 110                 return 0;
 111
 112         return oldp->oldp_len;
 113 }
 114
 115 /*
 116  * Copy out (partial) data to the user.  The copy is automatically limited to
 117  * the range of data requested by the user.  Return the requested length on
 118  * success (for the caller's convenience) or an error code on failure.
 119  */
 120 ssize_t
 121 mib_copyout(struct mib_oldp * __restrict oldp, size_t off,
 122         const void * __restrict buf, size_t size)
 123 {
 124         size_t len;
 125         int r;
 126
 127         len = size;
 128         assert(len <= SSIZE_MAX);
 129
 130         if (oldp == NULL || off >= oldp->oldp_len)
 131                 return size; /* nothing to do */
 132
 133         if (len > oldp->oldp_len - off)
 134                 len = oldp->oldp_len - off;
 135
 136         if ((r = sys_datacopy(SELF, (vir_bytes)buf, oldp->oldp_endpt,
 137             oldp->oldp_addr + off, len)) != OK)
 138                 return r;
 139
 140         return size;
 141 }
 142
 143 /*
 144  * Override the oldlen value returned from the call, in situations where an
 145  * error is thrown as well.
 146  */
 147 void
 148 mib_setoldlen(struct mib_call * call, size_t oldlen)
 149 {
 150
 151         call->call_reslen = oldlen;
 152 }
 153
 154 /*
 155  * Return the new data length as provided by the user, or 0 if the user did not
 156  * supply new data.
 157  */
 158 size_t
 159 mib_getnewlen(struct mib_newp * newp)
 160 {
 161
 162         if (newp == NULL)
 163                 return 0;
 164
 165         return newp->newp_len;
 166 }
 167
 168 /*
 169  * Copy in data from the user.  The given length must match exactly the length
 170  * given by the user.  Return OK or an error code.
 171  */
 172 int
 173 mib_copyin(struct mib_newp * __restrict newp, void * __restrict buf,
 174         size_t len)
 175 {
 176
 177         if (newp == NULL || len != newp->newp_len)
 178                 return EINVAL;
 179
 180         if (len == 0)
 181                 return OK;
 182
 183         return sys_datacopy(newp->newp_endpt, newp->newp_addr, SELF,
 184             (vir_bytes)buf, len);
 185 }
 186
 187 /*
 188  * Copy in auxiliary data from the user, based on a user pointer obtained from
 189  * data copied in earlier through mib_copyin().
 190  */
 191 int
 192 mib_copyin_aux(struct mib_newp * __restrict newp, vir_bytes addr,
 193         void * __restrict buf, size_t len)
 194 {
 195
 196         assert(newp != NULL);
 197
 198         if (len == 0)
 199                 return OK;
 200
 201         return sys_datacopy(newp->newp_endpt, addr, SELF, (vir_bytes)buf, len);
 202 }
 203
 204 /*
 205  * Create a grant for a call's old data region, if not NULL, for the given
 206  * endpoint.  On success, store the grant (or GRANT_INVALID) in grantp and the
 207  * length in lenp, and return OK.  On error, return an error code that must not
 208  * be ENOMEM.
 209  */
 210 int
 211 mib_relay_oldp(endpoint_t endpt, struct mib_oldp * __restrict oldp,
 212         cp_grant_id_t * grantp, size_t * __restrict lenp)
 213 {
 214
 215         if (oldp != NULL) {
 216                 *grantp = cpf_grant_magic(endpt, oldp->oldp_endpt,
 217                     oldp->oldp_addr, oldp->oldp_len, CPF_WRITE);
 218                 if (!GRANT_VALID(*grantp))
 219                         return EINVAL;
 220                 *lenp = oldp->oldp_len;
 221         } else {
 222                 *grantp = GRANT_INVALID;
 223                 *lenp = 0;
 224         }
 225
 226         return OK;
 227 }
 228
 229 /*
 230  * Create a grant for a call's new data region, if not NULL, for the given
 231  * endpoint.  On success, store the grant (or GRANT_INVALID) in grantp and the
 232  * length in lenp, and return OK.  On error, return an error code that must not
 233  * be ENOMEM.
 234  */
 235 int
 236 mib_relay_newp(endpoint_t endpt, struct mib_newp * __restrict newp,
 237         cp_grant_id_t * grantp, size_t * __restrict lenp)
 238 {
 239
 240         if (newp != NULL) {
 241                 *grantp = cpf_grant_magic(endpt, newp->newp_endpt,
 242                     newp->newp_addr, newp->newp_len, CPF_READ);
 243                 if (!GRANT_VALID(*grantp))
 244                         return EINVAL;
 245                 *lenp = newp->newp_len;
 246         } else {
 247                 *grantp = GRANT_INVALID;
 248                 *lenp = 0;
 249         }
 250
 251         return OK;
 252 }
 253
 254 /*
 255  * Check whether the user is allowed to perform privileged operations.  The
 256  * function returns a nonzero value if this is the case, and zero otherwise.
 257  * Authorization is performed only once per call.
 258  */
 259 int
 260 mib_authed(struct mib_call * call)
 261 {
 262
 263         if ((call->call_flags & (MIB_FLAG_AUTH | MIB_FLAG_NOAUTH)) == 0) {
 264                 /* Ask PM if this endpoint has superuser privileges. */
 265                 if (getnuid(call->call_endpt) == SUPER_USER)
 266                         call->call_flags |= MIB_FLAG_AUTH;
 267                 else
 268                         call->call_flags |= MIB_FLAG_NOAUTH;
 269         }
 270
 271         return (call->call_flags & MIB_FLAG_AUTH);
 272 }
 273
 274 /*
 275  * Implement the sysctl(2) system call.
 276  */
 277 static int
 278 mib_sysctl(message * __restrict m_in, int ipc_status,
 279         message * __restrict m_out)
 280 {
 281         vir_bytes oldaddr, newaddr;
 282         size_t oldlen, newlen;
 283         unsigned int namelen;
 284         int s, name[CTL_MAXNAME];
 285         endpoint_t endpt;
 286         struct mib_oldp oldp, *oldpp;
 287         struct mib_newp newp, *newpp;
 288         struct mib_call call;
 289         ssize_t r;
 290
 291         /* Only handle blocking calls.  Ignore everything else. */
 292         if (IPC_STATUS_CALL(ipc_status) != SENDREC)
 293                 return EDONTREPLY;
 294
 295         endpt = m_in->m_source;
 296         oldaddr = m_in->m_lc_mib_sysctl.oldp;
 297         oldlen = m_in->m_lc_mib_sysctl.oldlen;
 298         newaddr = m_in->m_lc_mib_sysctl.newp;
 299         newlen = m_in->m_lc_mib_sysctl.newlen;
 300         namelen = m_in->m_lc_mib_sysctl.namelen;
 301
 302         if (namelen == 0 || namelen > CTL_MAXNAME)
 303                 return EINVAL;
 304
 305         /*
 306          * In most cases, the entire name fits in the request message, so we
 307          * can avoid a kernel copy.
 308          */
 309         if (namelen > CTL_SHORTNAME) {
 310                 if ((s = sys_datacopy(endpt, m_in->m_lc_mib_sysctl.namep, SELF,
 311                     (vir_bytes)&name, sizeof(name[0]) * namelen)) != OK)
 312                         return s;
 313         } else
 314                 memcpy(name, m_in->m_lc_mib_sysctl.name,
 315                     sizeof(name[0]) * namelen);
 316
 317         /*
 318          * Set up a structure for the old data, if any.  When no old address is
 319          * given, be forgiving if oldlen is not zero, as the user may simply
 320          * not have initialized the variable before passing a pointer to it.
 321          */
 322         if (oldaddr != 0) {
 323                 oldp.oldp_endpt = endpt;
 324                 oldp.oldp_addr = oldaddr;
 325                 oldp.oldp_len = oldlen;
 326                 oldpp = &oldp;
 327         } else
 328                 oldpp = NULL;
 329
 330         /*
 331          * Set up a structure for the new data, if any.  If one of newaddr and
 332          * newlen is zero but not the other, we (like NetBSD) disregard both.
 333          */
 334         if (newaddr != 0 && newlen != 0) {
 335                 newp.newp_endpt = endpt;
 336                 newp.newp_addr = newaddr;
 337                 newp.newp_len = newlen;
 338                 newpp = &newp;
 339         } else
 340                 newpp = NULL;
 341
 342         /*
 343          * Set up a structure for other call parameters.  Most of these should
 344          * be used rarely, and we may want to add more later, so do not pass
 345          * all of them around as actual function parameters all the time.
 346          */
 347         call.call_endpt = endpt;
 348         call.call_name = name;
 349         call.call_namelen = namelen;
 350         call.call_flags = 0;
 351         call.call_reslen = 0;
 352
 353         r = mib_dispatch(&call, oldpp, newpp);
 354
 355         /*
 356          * From NetBSD: we copy out as much as we can from the old data, while
 357          * at the same time computing the full data length.  Then, here at the
 358          * end, if the entire result did not fit in the destination buffer, we
 359          * return ENOMEM instead of success, thus also returning a partial
 360          * result and the full data length.
 361          *
 362          * It is also possible that data are copied out along with a "real"
 363          * error.  In that case, we must report a nonzero resulting length
 364          * along with that error code.  This is currently the case when node
 365          * creation resulted in a collision, in which case the error code is
 366          * EEXIST while the existing node is copied out as well.
 367          */
 368         if (r >= 0) {
 369                 m_out->m_mib_lc_sysctl.oldlen = (size_t)r;
 370
 371                 if (oldaddr != 0 && oldlen < (size_t)r)
 372                         r = ENOMEM;
 373                 else
 374                         r = OK;
 375         } else
 376                 m_out->m_mib_lc_sysctl.oldlen = call.call_reslen;
 377
 378         return r;
 379 }
 380
 381 /*
 382  * Initialize the service.
 383  */
 384 static int
 385 mib_init(int type __unused, sef_init_info_t * info __unused)
 386 {
 387
 388         /*
 389          * Initialize pointers and sizes of subtrees in different modules.
 390          * This is needed because we cannot use sizeof on external arrays.
 391          * We do initialize the node entry (including any other fields)
 392          * statically through MIB_ENODE because that forces the array to be
 393          * large enough to store the entry.
 394          */
 395         mib_kern_init(&mib_table[CTL_KERN]);
 396         mib_vm_init(&mib_table[CTL_VM]);
 397         mib_hw_init(&mib_table[CTL_HW]);
 398         mib_minix_init(&mib_table[CTL_MINIX]);
 399
 400         /*
 401          * Now that the static tree is complete, go through the entire tree,
 402          * initializing miscellaneous fields.
 403          */
 404         mib_tree_init();
 405
 406         /* Prepare for requests to mount remote subtrees. */
 407         mib_remote_init();
 408
 409         return OK;
 410 }
 411
 412 /*
 413  * Perform SEF startup.
 414  */
 415 static void
 416 mib_startup(void)
 417 {
 418
 419         sef_setcb_init_fresh(mib_init);
 420         /*
 421          * If we restart we lose all dynamic state, which means we lose all
 422          * nodes that have been created at run time.  However, running with
 423          * only the static node tree is still better than not running at all.
 424          */
 425         sef_setcb_init_restart(mib_init);
 426
 427         sef_startup();
 428 }
 429
 430 /*
 431  * The Management Information Base (MIB) service.
 432  */
 433 int
 434 main(void)
 435 {
 436         message m_in, m_out;
 437         int r, ipc_status;
 438
 439         /* Perform initialization. */
 440         mib_startup();
 441
 442         /* The main message loop. */
 443         for (;;) {
 444                 /* Receive a request. */
 445                 if ((r = sef_receive_status(ANY, &m_in, &ipc_status)) != OK)
 446                         panic("sef_receive failed: %d", r);
 447
 448                 /* Process the request. */
 449                 if (is_ipc_notify(ipc_status)) {
 450                         /* We are not expecting any notifications. */
 451                         printf("MIB: notification from %d\n", m_in.m_source);
 452
 453                         continue;
 454                 }
 455
 456                 memset(&m_out, 0, sizeof(m_out));
 457
 458                 switch (m_in.m_type) {
 459                 case MIB_SYSCTL:
 460                         r = mib_sysctl(&m_in, ipc_status, &m_out);
 461
 462                         break;
 463
 464                 case MIB_REGISTER:
 465                         r = mib_register(&m_in, ipc_status);
 466
 467                         break;
 468
 469                 case MIB_DEREGISTER:
 470                         r = mib_deregister(&m_in, ipc_status);
 471
 472                         break;
 473
 474                 default:
 475                         if (IPC_STATUS_CALL(ipc_status) == SENDREC)
 476                                 r = ENOSYS;
 477                         else
 478                                 r = EDONTREPLY;
 479                 }
 480
 481                 /* Send a reply, if applicable. */
 482                 if (r != EDONTREPLY) {
 483                         m_out.m_type = r;
 484
 485                         if ((r = ipc_sendnb(m_in.m_source, &m_out)) != OK)
 486                                 printf("MIB: ipc_sendnb failed (%d)\n", r);
 487                 }
 488         }
 489
 490         /* NOTREACHED */
 491         return 0;
 492 }