4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/atomic.h>
26 #include <sys/cmn_err.h>
27 #include <sys/id_space.h>
29 #include <sys/kstat.h>
31 #include <sys/modctl.h>
32 #include <sys/modhash.h>
33 #include <sys/mutex.h>
35 #include <sys/procset.h>
36 #include <sys/project.h>
37 #include <sys/resource.h>
39 #include <sys/siginfo.h>
40 #include <sys/strlog.h>
41 #include <sys/systm.h>
43 #include <sys/types.h>
44 #include <sys/policy.h>
48 * Resource controls (rctls)
50 * The rctl subsystem provides a mechanism for kernel components to
51 * register their individual resource controls with the system as a whole,
52 * such that those controls can subscribe to specific actions while being
53 * associated with the various process-model entities provided by the kernel:
54 * the process, the task, the project, and the zone. (In principle, only
55 * minor modifications would be required to connect the resource control
56 * functionality to non-process-model entities associated with the system.)
58 * Subsystems register their rctls via rctl_register(). Subsystems
59 * also wishing to provide additional limits on a given rctl can modify
60 * them once they have the rctl handle. Each subsystem should store the
61 * handle to their rctl for direct access.
63 * A primary dictionary, rctl_dict, contains a hash of id to the default
64 * control definition for each controlled resource-entity pair on the system.
65 * A secondary dictionary, rctl_dict_by_name, contains a hash of name to
66 * resource control handles. The resource control handles are distributed by
67 * the rctl_ids ID space. The handles are private and not to be
68 * advertised to userland; all userland interactions are via the rctl
71 * Entities inherit their rctls from their predecessor. Since projects have
72 * no ancestor, they inherit their rctls from the rctl dict for project
73 * rctls. It is expected that project controls will be set to their
74 * appropriate values shortly after project creation, presumably from a
75 * policy source such as the project database.
78 * The rctl_set_t attached to each of the process model entities is a simple
79 * hash table keyed on the rctl handle assigned at registration. The entries
80 * in the hash table are rctl_t's, whose relationship with the active control
81 * values on that resource and with the global state of the resource we
84 * rctl_dict[key] --> rctl_dict_entry
88 * rctl_set[key] ---> | rctl | --> value <-> value <-> system value --> NULL
91 * +------- cursor ------+
93 * That is, the rctl contains a back pointer to the global resource control
94 * state for this resource, which is also available in the rctl_dict hash
95 * table mentioned earlier. The rctl contains two pointers to resource
96 * control values: one, values, indicates the entire sequence of control
97 * values; the other, cursor, indicates the currently active control
98 * value--the next value to be enforced. The value list itself is an open,
99 * doubly-linked list, the last non-NULL member of which is the system value
100 * for that resource (being the theoretical/conventional maximum allowable
101 * value for the resource on this OS instance).
104 * Subsystems publishing rctls need not provide instances of all of the
105 * functions specified by the ops vector. In particular, if general
106 * rctl_*() entry points are not being called, certain functions can be
107 * omitted. These align as follows:
110 * You may wish to provide a set callback if locking circumstances prevent
111 * it or if the performance cost of requesting the enforced value from the
112 * resource control is prohibitively expensive. For instance, the currently
113 * enforced file size limit is stored on the process in the p_fsz_ctl to
114 * maintain read()/write() performance.
117 * You must provide a test callback if you are using the rctl_test()
118 * interface. An action callback is optional.
121 * You may wish to provide an action callback.
124 * New resource controls can be added to a running instance by loaded modules
125 * via registration. (The current implementation does not support unloadable
126 * modules; this functionality can be added if needed, via an
127 * activation/deactivation interface involving the manipulation of the
128 * ops vector for the resource control(s) needing to support unloading.)
130 * Control value ordering
131 * Because the rctl_val chain on each rctl must be navigable in a
132 * deterministic way, we have to define an ordering on the rctl_val_t's. The
133 * defined order is (flags & [maximal], value, flags & [deny-action],
137 * rctl_dict_lock must be acquired prior to rctl_lists_lock. Since
138 * rctl_dict_lock or rctl_lists_lock can be called at the enforcement point
139 * of any subsystem, holding subsystem locks, it is at all times inappropriate
140 * to call kmem_alloc(., KM_SLEEP) while holding either of these locks.
141 * Traversing any of the various resource control entity lists requires
142 * holding rctl_lists_lock.
144 * Each individual resource control set associated with an entity must have
145 * its rcs_lock held for the duration of any operations that would add
146 * resource controls or control values to the set.
148 * The locking subsequence of interest is: p_lock, rctl_dict_lock,
149 * rctl_lists_lock, entity->rcs_lock.
151 * The projects(4) database and project entity resource controls
152 * A special case is made for RCENTITY_PROJECT values set through the
153 * setproject(3PROJECT) interface. setproject() makes use of a private
154 * interface, setprojrctl(), which passes through an array of resource control
155 * blocks that need to be set while holding the entity->rcs_lock. This
156 * ensures that the act of modifying a project's resource controls is
157 * "atomic" within the kernel.
159 * Within the rctl sub-system, we provide two interfaces that are only used by
160 * the setprojrctl() code path - rctl_local_insert_all() and
161 * rctl_local_replace_all(). rctl_local_insert_all() will ensure that the
162 * resource values specified in *new_values are applied.
163 * rctl_local_replace_all() will purge the current rctl->rc_projdb and
164 * rctl->rc_values entries, and apply the *new_values.
166 * These functions modify not only the linked list of active resource controls
167 * (rctl->rc_values), but also a "cached" linked list (rctl->rc_projdb) of
168 * values set through these interfaces. To clarify:
170 * rctl->rc_values - a linked list of rctl_val_t. These are the active
171 * resource values associated with this rctl, and may have been set by
172 * setrctl() - via prctl(1M), or by setprojrctl() - via
173 * setproject(3PROJECT).
175 * rctl->rc_projdb - a linked list of rctl_val_t. These reflect the
176 * resource values set by the setprojrctl() code path. rc_projdb is not
177 * referenced by any other component of the rctl sub-system.
179 * As various locks are held when calling these functions, we ensure that all
180 * the possible memory allocations are performed prior to calling the
181 * function. *alloc_values is a linked list of uninitialized rctl_val_t,
182 * which may be used to duplicate a new resource control value (passed in as
183 * one of the members of the *new_values linked list), in order to populate
187 id_t max_rctl_hndl
= 32768;
188 int rctl_dict_size
= 64;
189 int rctl_set_size
= 8;
190 kmutex_t rctl_dict_lock
;
191 mod_hash_t
*rctl_dict
;
192 mod_hash_t
*rctl_dict_by_name
;
193 id_space_t
*rctl_ids
;
194 kmem_cache_t
*rctl_cache
; /* kmem cache for rctl structures */
195 kmem_cache_t
*rctl_val_cache
; /* kmem cache for rctl values */
197 kmutex_t rctl_lists_lock
;
198 rctl_dict_entry_t
*rctl_lists
[RC_MAX_ENTITY
+ 1];
201 * Default resource control operations and ops vector
202 * To be used if the particular rcontrol has no specific actions defined, or
203 * if the subsystem providing the control is quiescing (in preparation for
204 * unloading, presumably.)
206 * Resource controls with callbacks should fill the unused operations with the
207 * appropriate default impotent callback.
211 rcop_no_action(struct rctl
*r
, struct proc
*p
, rctl_entity_p_t
*e
)
217 rcop_no_usage(struct rctl
*r
, struct proc
*p
)
224 rcop_no_set(struct rctl
*r
, struct proc
*p
, rctl_entity_p_t
*e
, rctl_qty_t l
)
231 rcop_no_test(struct rctl
*r
, struct proc
*p
, rctl_entity_p_t
*e
,
232 struct rctl_val
*rv
, rctl_qty_t i
, uint_t f
)
237 rctl_ops_t rctl_default_ops
= {
245 * Default "absolute" resource control operation and ops vector
246 * Useful if there is no usage associated with the
251 rcop_absolute_test(struct rctl
*r
, struct proc
*p
, rctl_entity_p_t
*e
,
252 struct rctl_val
*rv
, rctl_qty_t i
, uint_t f
)
254 return (i
> rv
->rcv_value
);
257 rctl_ops_t rctl_absolute_ops
= {
266 rctl_dict_hash_by_id(void *hash_data
, mod_hash_key_t key
)
268 return ((uint_t
)(uintptr_t)key
% rctl_dict_size
);
272 rctl_dict_id_cmp(mod_hash_key_t key1
, mod_hash_key_t key2
)
274 uint_t u1
= (uint_t
)(uintptr_t)key1
;
275 uint_t u2
= (uint_t
)(uintptr_t)key2
;
287 rctl_dict_val_dtor(mod_hash_val_t val
)
289 rctl_dict_entry_t
*kr
= (rctl_dict_entry_t
*)val
;
291 kmem_free(kr
, sizeof (rctl_dict_entry_t
));
295 * size_t rctl_build_name_buf()
298 * rctl_build_name_buf() walks all active resource controls in the dictionary,
299 * building a buffer of continguous NUL-terminated strings.
302 * The size of the buffer is returned, the passed pointer's contents are
303 * modified to that of the location of the buffer.
306 * Caller must be in a context suitable for KM_SLEEP allocations.
309 rctl_build_name_buf(char **rbufp
)
311 size_t req_size
, cpy_size
;
315 rctl_rebuild_name_buf
:
316 req_size
= cpy_size
= 0;
319 * Calculate needed buffer length.
321 mutex_enter(&rctl_lists_lock
);
322 for (i
= 0; i
< RC_MAX_ENTITY
+ 1; i
++) {
323 rctl_dict_entry_t
*rde
;
325 for (rde
= rctl_lists
[i
];
328 req_size
+= strlen(rde
->rcd_name
) + 1;
330 mutex_exit(&rctl_lists_lock
);
332 rbufloc
= *rbufp
= kmem_alloc(req_size
, KM_SLEEP
);
335 * Copy rctl names into our buffer. If the copy length exceeds the
336 * allocate length (due to registration changes), stop copying, free the
337 * buffer, and start again.
339 mutex_enter(&rctl_lists_lock
);
340 for (i
= 0; i
< RC_MAX_ENTITY
+ 1; i
++) {
341 rctl_dict_entry_t
*rde
;
343 for (rde
= rctl_lists
[i
];
345 rde
= rde
->rcd_next
) {
346 size_t length
= strlen(rde
->rcd_name
) + 1;
350 if (cpy_size
> req_size
) {
351 kmem_free(*rbufp
, req_size
);
352 mutex_exit(&rctl_lists_lock
);
353 goto rctl_rebuild_name_buf
;
356 bcopy(rde
->rcd_name
, rbufloc
, length
);
360 mutex_exit(&rctl_lists_lock
);
366 * rctl_dict_entry_t *rctl_dict_lookup(const char *)
369 * rctl_dict_lookup() returns the resource control dictionary entry for the
370 * named resource control.
373 * A pointer to the appropriate resource control dictionary entry, or NULL if
374 * no such named entry exists.
377 * Caller must not be holding rctl_dict_lock.
380 rctl_dict_lookup(const char *name
)
382 rctl_dict_entry_t
*rde
;
384 mutex_enter(&rctl_dict_lock
);
386 if (mod_hash_find(rctl_dict_by_name
, (mod_hash_key_t
)name
,
387 (mod_hash_val_t
*)&rde
) == MH_ERR_NOTFOUND
) {
388 mutex_exit(&rctl_dict_lock
);
392 mutex_exit(&rctl_dict_lock
);
398 * rctl_hndl_t rctl_hndl_lookup(const char *)
401 * rctl_hndl_lookup() returns the resource control id (the "handle") for the
402 * named resource control.
405 * The appropriate id, or -1 if no such named entry exists.
408 * Caller must not be holding rctl_dict_lock.
411 rctl_hndl_lookup(const char *name
)
413 rctl_dict_entry_t
*rde
;
415 if ((rde
= rctl_dict_lookup(name
)) == NULL
)
418 return (rde
->rcd_id
);
422 * rctl_dict_entry_t * rctl_dict_lookup_hndl(rctl_hndl_t)
425 * rctl_dict_lookup_hndl() completes the public lookup functions, by returning
426 * the resource control dictionary entry matching a given resource control id.
429 * A pointer to the matching resource control dictionary entry, or NULL if the
430 * id does not match any existing entries.
433 * Caller must not be holding rctl_lists_lock.
436 rctl_dict_lookup_hndl(rctl_hndl_t hndl
)
440 mutex_enter(&rctl_lists_lock
);
441 for (i
= 0; i
< RC_MAX_ENTITY
+ 1; i
++) {
442 rctl_dict_entry_t
*rde
;
444 for (rde
= rctl_lists
[i
];
447 if (rde
->rcd_id
== hndl
) {
448 mutex_exit(&rctl_lists_lock
);
452 mutex_exit(&rctl_lists_lock
);
458 * void rctl_add_default_limit(const char *name, rctl_qty_t value,
459 * rctl_priv_t privilege, uint_t action)
462 * Create a default limit with specified value, privilege, and action.
468 rctl_add_default_limit(const char *name
, rctl_qty_t value
,
469 rctl_priv_t privilege
, uint_t action
)
472 rctl_dict_entry_t
*rde
;
474 dval
= kmem_cache_alloc(rctl_val_cache
, KM_SLEEP
);
475 bzero(dval
, sizeof (rctl_val_t
));
476 dval
->rcv_value
= value
;
477 dval
->rcv_privilege
= privilege
;
478 dval
->rcv_flagaction
= action
;
479 dval
->rcv_action_recip_pid
= -1;
481 rde
= rctl_dict_lookup(name
);
482 (void) rctl_val_list_insert(&rde
->rcd_default_value
, dval
);
486 * void rctl_add_legacy_limit(const char *name, const char *mname,
487 * const char *lname, rctl_qty_t dflt)
490 * Create a default privileged limit, using the value obtained from
491 * /etc/system if it exists and is greater than the specified default
492 * value. Exists primarily for System V IPC.
498 rctl_add_legacy_limit(const char *name
, const char *mname
, const char *lname
,
499 rctl_qty_t dflt
, rctl_qty_t max
)
503 if (!mod_sysvar(mname
, lname
, &qty
) || (qty
< dflt
))
509 rctl_add_default_limit(name
, qty
, RCPRIV_PRIVILEGED
, RCTL_LOCAL_DENY
);
513 rctl_entity_obtain_rset(rctl_dict_entry_t
*rcd
, struct proc
*p
)
515 rctl_set_t
*rset
= NULL
;
520 switch (rcd
->rcd_entity
) {
521 case RCENTITY_PROCESS
:
525 ASSERT(MUTEX_HELD(&p
->p_lock
));
526 if (p
->p_task
!= NULL
)
527 rset
= p
->p_task
->tk_rctls
;
529 case RCENTITY_PROJECT
:
530 ASSERT(MUTEX_HELD(&p
->p_lock
));
531 if (p
->p_task
!= NULL
&&
532 p
->p_task
->tk_proj
!= NULL
)
533 rset
= p
->p_task
->tk_proj
->kpj_rctls
;
536 ASSERT(MUTEX_HELD(&p
->p_lock
));
537 if (p
->p_zone
!= NULL
)
538 rset
= p
->p_zone
->zone_rctls
;
541 panic("unknown rctl entity type %d seen", rcd
->rcd_entity
);
549 rctl_entity_obtain_entity_p(rctl_entity_t entity
, struct proc
*p
,
552 e
->rcep_p
.proc
= NULL
;
556 case RCENTITY_PROCESS
:
560 ASSERT(MUTEX_HELD(&p
->p_lock
));
561 if (p
->p_task
!= NULL
)
562 e
->rcep_p
.task
= p
->p_task
;
564 case RCENTITY_PROJECT
:
565 ASSERT(MUTEX_HELD(&p
->p_lock
));
566 if (p
->p_task
!= NULL
&&
567 p
->p_task
->tk_proj
!= NULL
)
568 e
->rcep_p
.proj
= p
->p_task
->tk_proj
;
571 ASSERT(MUTEX_HELD(&p
->p_lock
));
572 if (p
->p_zone
!= NULL
)
573 e
->rcep_p
.zone
= p
->p_zone
;
576 panic("unknown rctl entity type %d seen", entity
);
582 rctl_gp_alloc(rctl_alloc_gp_t
*rcgp
)
586 if (rcgp
->rcag_nctls
> 0) {
587 rctl_t
*prev
= kmem_cache_alloc(rctl_cache
, KM_SLEEP
);
590 rcgp
->rcag_ctls
= prev
;
592 for (i
= 1; i
< rcgp
->rcag_nctls
; i
++) {
593 rctl
= kmem_cache_alloc(rctl_cache
, KM_SLEEP
);
594 prev
->rc_next
= rctl
;
598 rctl
->rc_next
= NULL
;
601 if (rcgp
->rcag_nvals
> 0) {
602 rctl_val_t
*prev
= kmem_cache_alloc(rctl_val_cache
, KM_SLEEP
);
603 rctl_val_t
*rval
= prev
;
605 rcgp
->rcag_vals
= prev
;
607 for (i
= 1; i
< rcgp
->rcag_nvals
; i
++) {
608 rval
= kmem_cache_alloc(rctl_val_cache
, KM_SLEEP
);
609 prev
->rcv_next
= rval
;
613 rval
->rcv_next
= NULL
;
619 rctl_gp_detach_val(rctl_alloc_gp_t
*rcgp
)
621 rctl_val_t
*rval
= rcgp
->rcag_vals
;
623 ASSERT(rcgp
->rcag_nvals
> 0);
625 rcgp
->rcag_vals
= rval
->rcv_next
;
627 rval
->rcv_next
= NULL
;
633 rctl_gp_detach_ctl(rctl_alloc_gp_t
*rcgp
)
635 rctl_t
*rctl
= rcgp
->rcag_ctls
;
637 ASSERT(rcgp
->rcag_nctls
> 0);
639 rcgp
->rcag_ctls
= rctl
->rc_next
;
641 rctl
->rc_next
= NULL
;
648 rctl_gp_free(rctl_alloc_gp_t
*rcgp
)
650 rctl_val_t
*rval
= rcgp
->rcag_vals
;
651 rctl_t
*rctl
= rcgp
->rcag_ctls
;
653 while (rval
!= NULL
) {
654 rctl_val_t
*next
= rval
->rcv_next
;
656 kmem_cache_free(rctl_val_cache
, rval
);
660 while (rctl
!= NULL
) {
661 rctl_t
*next
= rctl
->rc_next
;
663 kmem_cache_free(rctl_cache
, rctl
);
669 * void rctl_prealloc_destroy(rctl_alloc_gp_t *)
672 * Release all unused memory allocated via one of the "prealloc" functions:
673 * rctl_set_init_prealloc, rctl_set_dup_prealloc, or rctl_rlimit_set_prealloc.
679 * No restrictions on context.
682 rctl_prealloc_destroy(rctl_alloc_gp_t
*gp
)
685 kmem_free(gp
, sizeof (rctl_alloc_gp_t
));
689 * int rctl_val_cmp(rctl_val_t *, rctl_val_t *, int)
692 * This function defines an ordering to rctl_val_t's in order to allow
693 * for correct placement in value lists. When the imprecise flag is set,
694 * the action recipient is ignored. This is to facilitate insert,
695 * delete, and replace operations by rctlsys.
698 * 0 if the val_t's are are considered identical
699 * -1 if a is ordered lower than b
700 * 1 if a is lowered higher than b
703 * No restrictions on context.
706 rctl_val_cmp(rctl_val_t
*a
, rctl_val_t
*b
, int imprecise
)
708 if ((a
->rcv_flagaction
& RCTL_LOCAL_MAXIMAL
) <
709 (b
->rcv_flagaction
& RCTL_LOCAL_MAXIMAL
))
712 if ((a
->rcv_flagaction
& RCTL_LOCAL_MAXIMAL
) >
713 (b
->rcv_flagaction
& RCTL_LOCAL_MAXIMAL
))
716 if (a
->rcv_value
< b
->rcv_value
)
719 if (a
->rcv_value
> b
->rcv_value
)
722 if ((a
->rcv_flagaction
& RCTL_LOCAL_DENY
) <
723 (b
->rcv_flagaction
& RCTL_LOCAL_DENY
))
726 if ((a
->rcv_flagaction
& RCTL_LOCAL_DENY
) >
727 (b
->rcv_flagaction
& RCTL_LOCAL_DENY
))
730 if (a
->rcv_privilege
< b
->rcv_privilege
)
733 if (a
->rcv_privilege
> b
->rcv_privilege
)
739 if (a
->rcv_action_recip_pid
< b
->rcv_action_recip_pid
)
742 if (a
->rcv_action_recip_pid
> b
->rcv_action_recip_pid
)
749 rctl_val_list_find(rctl_val_t
**head
, rctl_val_t
*cval
)
751 rctl_val_t
*rval
= *head
;
753 while (rval
!= NULL
) {
754 if (rctl_val_cmp(cval
, rval
, 0) == 0)
757 rval
= rval
->rcv_next
;
765 * int rctl_val_list_insert(rctl_val_t **, rctl_val_t *)
768 * This function inserts the rctl_val_t into the value list provided.
769 * The insert is always successful unless if the value is a duplicate
770 * of one already in the list.
773 * 1 if the value was a duplicate of an existing value in the list.
774 * 0 if the insert was successful.
777 rctl_val_list_insert(rctl_val_t
**root
, rctl_val_t
*rval
)
782 rval
->rcv_next
= NULL
;
783 rval
->rcv_prev
= NULL
;
790 equiv
= rctl_val_cmp(rval
, *root
, 0);
796 rval
->rcv_next
= *root
;
797 rval
->rcv_next
->rcv_prev
= rval
;
804 while (prev
->rcv_next
!= NULL
&&
805 (equiv
= rctl_val_cmp(rval
, prev
->rcv_next
, 0)) > 0) {
806 prev
= prev
->rcv_next
;
812 rval
->rcv_next
= prev
->rcv_next
;
813 if (rval
->rcv_next
!= NULL
)
814 rval
->rcv_next
->rcv_prev
= rval
;
815 prev
->rcv_next
= rval
;
816 rval
->rcv_prev
= prev
;
822 rctl_val_list_delete(rctl_val_t
**root
, rctl_val_t
*rval
)
830 if (rctl_val_cmp(rval
, prev
, 0) == 0) {
831 *root
= prev
->rcv_next
;
833 (*root
)->rcv_prev
= NULL
;
835 kmem_cache_free(rctl_val_cache
, prev
);
840 while (prev
->rcv_next
!= NULL
&&
841 rctl_val_cmp(rval
, prev
->rcv_next
, 0) != 0) {
842 prev
= prev
->rcv_next
;
845 if (prev
->rcv_next
== NULL
) {
847 * If we navigate the entire list and cannot find a match, then
853 prev
= prev
->rcv_next
;
854 prev
->rcv_prev
->rcv_next
= prev
->rcv_next
;
855 if (prev
->rcv_next
!= NULL
)
856 prev
->rcv_next
->rcv_prev
= prev
->rcv_prev
;
858 kmem_cache_free(rctl_val_cache
, prev
);
864 rctl_val_list_dup(rctl_val_t
*rval
, rctl_alloc_gp_t
*ragp
, struct proc
*oldp
,
867 rctl_val_t
*head
= NULL
;
869 for (; rval
!= NULL
; rval
= rval
->rcv_next
) {
870 rctl_val_t
*dval
= rctl_gp_detach_val(ragp
);
872 bcopy(rval
, dval
, sizeof (rctl_val_t
));
873 dval
->rcv_prev
= dval
->rcv_next
= NULL
;
876 rval
->rcv_action_recipient
== NULL
||
877 rval
->rcv_action_recipient
== oldp
) {
878 if (rval
->rcv_privilege
== RCPRIV_BASIC
) {
879 dval
->rcv_action_recipient
= newp
;
880 dval
->rcv_action_recip_pid
= newp
->p_pid
;
882 dval
->rcv_action_recipient
= NULL
;
883 dval
->rcv_action_recip_pid
= -1;
886 (void) rctl_val_list_insert(&head
, dval
);
888 kmem_cache_free(rctl_val_cache
, dval
);
896 rctl_val_list_reset(rctl_val_t
*rval
)
898 for (; rval
!= NULL
; rval
= rval
->rcv_next
)
899 rval
->rcv_firing_time
= 0;
903 rctl_val_list_count(rctl_val_t
*rval
)
907 for (; rval
!= NULL
; rval
= rval
->rcv_next
)
915 rctl_val_list_free(rctl_val_t
*rval
)
917 while (rval
!= NULL
) {
918 rctl_val_t
*next
= rval
->rcv_next
;
920 kmem_cache_free(rctl_val_cache
, rval
);
927 * rctl_qty_t rctl_model_maximum(rctl_dict_entry_t *, struct proc *)
930 * In cases where the operating system supports more than one process
931 * addressing model, the operating system capabilities will exceed those of
932 * one or more of these models. Processes in a less capable model must have
933 * their resources accurately controlled, without diluting those of their
934 * descendants reached via exec(). rctl_model_maximum() returns the governing
935 * value for the specified process with respect to a resource control, such
936 * that the value can used for the RCTLOP_SET callback or compatability
940 * The maximum value for the given process for the specified resource control.
943 * No restrictions on context.
946 rctl_model_maximum(rctl_dict_entry_t
*rde
, struct proc
*p
)
948 if (p
->p_model
== DATAMODEL_NATIVE
)
949 return (rde
->rcd_max_native
);
951 return (rde
->rcd_max_ilp32
);
955 * rctl_qty_t rctl_model_value(rctl_dict_entry_t *, struct proc *, rctl_qty_t)
958 * Convenience function wrapping the rctl_model_maximum() functionality.
961 * The lesser of the process's maximum value and the given value for the
962 * specified resource control.
965 * No restrictions on context.
968 rctl_model_value(rctl_dict_entry_t
*rde
, struct proc
*p
, rctl_qty_t value
)
970 rctl_qty_t max
= rctl_model_maximum(rde
, p
);
972 return (value
< max
? value
: max
);
976 rctl_set_insert(rctl_set_t
*set
, rctl_hndl_t hndl
, rctl_t
*rctl
)
978 uint_t index
= hndl
% rctl_set_size
;
979 rctl_t
*next_ctl
, *prev_ctl
;
981 ASSERT(MUTEX_HELD(&set
->rcs_lock
));
983 rctl
->rc_next
= NULL
;
985 if (set
->rcs_ctls
[index
] == NULL
) {
986 set
->rcs_ctls
[index
] = rctl
;
990 if (hndl
< set
->rcs_ctls
[index
]->rc_id
) {
991 rctl
->rc_next
= set
->rcs_ctls
[index
];
992 set
->rcs_ctls
[index
] = rctl
;
997 for (next_ctl
= set
->rcs_ctls
[index
]->rc_next
,
998 prev_ctl
= set
->rcs_ctls
[index
];
1000 prev_ctl
= next_ctl
,
1001 next_ctl
= next_ctl
->rc_next
) {
1002 if (next_ctl
->rc_id
> hndl
) {
1003 rctl
->rc_next
= next_ctl
;
1004 prev_ctl
->rc_next
= rctl
;
1010 rctl
->rc_next
= next_ctl
;
1011 prev_ctl
->rc_next
= rctl
;
1015 * rctl_set_t *rctl_set_create()
1018 * Create an empty resource control set, suitable for attaching to a
1019 * controlled entity.
1022 * A pointer to the newly created set.
1025 * Safe for KM_SLEEP allocations.
1030 rctl_set_t
*rset
= kmem_zalloc(sizeof (rctl_set_t
), KM_SLEEP
);
1032 mutex_init(&rset
->rcs_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1033 rset
->rcs_ctls
= kmem_zalloc(rctl_set_size
* sizeof (rctl_t
*),
1035 rset
->rcs_entity
= -1;
1041 * rctl_gp_alloc_t *rctl_set_init_prealloc(rctl_entity_t)
1044 * rctl_set_init_prealloc() examines the globally defined resource controls
1045 * and their default values and returns a resource control allocation group
1046 * populated with sufficient controls and values to form a representative
1047 * resource control set for the specified entity.
1050 * A pointer to the newly created allocation group.
1053 * Caller must be in a context suitable for KM_SLEEP allocations.
1056 rctl_set_init_prealloc(rctl_entity_t entity
)
1058 rctl_dict_entry_t
*rde
;
1059 rctl_alloc_gp_t
*ragp
= kmem_zalloc(sizeof (rctl_alloc_gp_t
), KM_SLEEP
);
1061 ASSERT(MUTEX_NOT_HELD(&curproc
->p_lock
));
1063 if (rctl_lists
[entity
] == NULL
)
1066 mutex_enter(&rctl_lists_lock
);
1068 for (rde
= rctl_lists
[entity
]; rde
!= NULL
; rde
= rde
->rcd_next
) {
1070 ragp
->rcag_nvals
+= rctl_val_list_count(rde
->rcd_default_value
);
1073 mutex_exit(&rctl_lists_lock
);
1075 rctl_gp_alloc(ragp
);
1081 * rctl_set_t *rctl_set_init(rctl_entity_t)
1084 * rctl_set_create() creates a resource control set, initialized with the
1085 * system infinite values on all registered controls, for attachment to a
1086 * system entity requiring resource controls, such as a process or a task.
1089 * A pointer to the newly filled set.
1092 * Caller must be holding p_lock on entry so that RCTLOP_SET() functions
1093 * may modify task and project members based on the proc structure
1097 rctl_set_init(rctl_entity_t entity
, struct proc
*p
, rctl_entity_p_t
*e
,
1098 rctl_set_t
*rset
, rctl_alloc_gp_t
*ragp
)
1100 rctl_dict_entry_t
*rde
;
1102 ASSERT(MUTEX_HELD(&p
->p_lock
));
1104 rset
->rcs_entity
= entity
;
1106 if (rctl_lists
[entity
] == NULL
)
1109 mutex_enter(&rctl_lists_lock
);
1110 mutex_enter(&rset
->rcs_lock
);
1112 for (rde
= rctl_lists
[entity
]; rde
!= NULL
; rde
= rde
->rcd_next
) {
1113 rctl_t
*rctl
= rctl_gp_detach_ctl(ragp
);
1115 rctl
->rc_dict_entry
= rde
;
1116 rctl
->rc_id
= rde
->rcd_id
;
1117 rctl
->rc_projdb
= NULL
;
1119 rctl
->rc_values
= rctl_val_list_dup(rde
->rcd_default_value
,
1121 rctl
->rc_cursor
= rctl
->rc_values
;
1123 ASSERT(rctl
->rc_cursor
!= NULL
);
1125 rctl_set_insert(rset
, rde
->rcd_id
, rctl
);
1127 RCTLOP_SET(rctl
, p
, e
, rctl_model_value(rctl
->rc_dict_entry
, p
,
1128 rctl
->rc_cursor
->rcv_value
));
1131 mutex_exit(&rset
->rcs_lock
);
1132 mutex_exit(&rctl_lists_lock
);
1138 rctl_dup(rctl_t
*rctl
, rctl_alloc_gp_t
*ragp
, struct proc
*oldp
,
1141 rctl_t
*dup
= rctl_gp_detach_ctl(ragp
);
1144 dup
->rc_id
= rctl
->rc_id
;
1145 dup
->rc_dict_entry
= rctl
->rc_dict_entry
;
1146 dup
->rc_next
= NULL
;
1147 dup
->rc_cursor
= NULL
;
1148 dup
->rc_values
= rctl_val_list_dup(rctl
->rc_values
, ragp
, oldp
, newp
);
1150 for (dval
= dup
->rc_values
;
1151 dval
!= NULL
; dval
= dval
->rcv_next
) {
1152 if (rctl_val_cmp(rctl
->rc_cursor
, dval
, 0) >= 0) {
1153 dup
->rc_cursor
= dval
;
1158 if (dup
->rc_cursor
== NULL
)
1159 dup
->rc_cursor
= dup
->rc_values
;
1165 rctl_set_fill_alloc_gp(rctl_set_t
*set
, rctl_alloc_gp_t
*ragp
)
1169 bzero(ragp
, sizeof (rctl_alloc_gp_t
));
1171 for (i
= 0; i
< rctl_set_size
; i
++) {
1172 rctl_t
*r
= set
->rcs_ctls
[i
];
1177 ragp
->rcag_nvals
+= rctl_val_list_count(r
->rc_values
);
1185 * rctl_alloc_gp_t *rctl_set_dup_prealloc(rctl_set_t *)
1188 * Given a resource control set, allocate a sufficiently large allocation
1189 * group to contain a duplicate of the set.
1192 * A pointer to the newly created allocation group.
1195 * Safe for KM_SLEEP allocations.
1198 rctl_set_dup_prealloc(rctl_set_t
*set
)
1200 rctl_alloc_gp_t
*ragp
= kmem_zalloc(sizeof (rctl_alloc_gp_t
), KM_SLEEP
);
1202 ASSERT(MUTEX_NOT_HELD(&curproc
->p_lock
));
1204 mutex_enter(&set
->rcs_lock
);
1205 rctl_set_fill_alloc_gp(set
, ragp
);
1206 mutex_exit(&set
->rcs_lock
);
1208 rctl_gp_alloc(ragp
);
1214 * int rctl_set_dup_ready(rctl_set_t *, rctl_alloc_gp_t *)
1217 * Verify that the allocation group provided is large enough to allow a
1218 * duplicate of the given resource control set to be constructed from its
1222 * 1 if the allocation group is sufficiently large, 0 otherwise.
1225 * rcs_lock must be held prior to entry.
1228 rctl_set_dup_ready(rctl_set_t
*set
, rctl_alloc_gp_t
*ragp
)
1230 rctl_alloc_gp_t curr_gp
;
1232 ASSERT(MUTEX_HELD(&set
->rcs_lock
));
1234 rctl_set_fill_alloc_gp(set
, &curr_gp
);
1236 if (curr_gp
.rcag_nctls
<= ragp
->rcag_nctls
&&
1237 curr_gp
.rcag_nvals
<= ragp
->rcag_nvals
)
1244 * rctl_set_t *rctl_set_dup(rctl_set_t *, struct proc *, struct proc *,
1245 * rctl_set_t *, rctl_alloc_gp_t *, int)
1248 * Make a duplicate of the resource control set. The proc pointers are those
1249 * of the owning process and of the process associated with the entity
1250 * receiving the duplicate.
1252 * Duplication is a 3 stage process. Stage 1 is memory allocation for
1253 * the duplicate set, which is taken care of by rctl_set_dup_prealloc().
1254 * Stage 2 consists of copying all rctls and values from the old set into
1255 * the new. Stage 3 completes the duplication by performing the appropriate
1256 * callbacks for each rctl in the new set.
1258 * Stages 2 and 3 are handled by calling rctl_set_dup with the RCD_DUP and
1259 * RCD_CALLBACK functions, respectively. The RCD_CALLBACK flag may only
1260 * be supplied if the newp proc structure reflects the new task and
1264 * A pointer to the duplicate set.
1267 * The rcs_lock of the set to be duplicated must be held prior to entry.
1270 rctl_set_dup(rctl_set_t
*set
, struct proc
*oldp
, struct proc
*newp
,
1271 rctl_entity_p_t
*e
, rctl_set_t
*dup
, rctl_alloc_gp_t
*ragp
, int flag
)
1276 ASSERT((flag
& RCD_DUP
) || (flag
& RCD_CALLBACK
));
1279 * When copying the old set, iterate over that. Otherwise, when
1280 * only callbacks have been requested, iterate over the dup set.
1282 if (flag
& RCD_DUP
) {
1283 ASSERT(MUTEX_HELD(&set
->rcs_lock
));
1285 dup
->rcs_entity
= set
->rcs_entity
;
1290 mutex_enter(&dup
->rcs_lock
);
1292 for (i
= 0; i
< rctl_set_size
; i
++) {
1293 rctl_t
*r
= iter
->rcs_ctls
[i
];
1297 if (flag
& RCD_DUP
) {
1298 d
= rctl_dup(r
, ragp
, oldp
, newp
);
1299 rctl_set_insert(dup
, r
->rc_id
, d
);
1304 if (flag
& RCD_CALLBACK
)
1305 RCTLOP_SET(d
, newp
, e
,
1306 rctl_model_value(d
->rc_dict_entry
, newp
,
1307 d
->rc_cursor
->rcv_value
));
1313 mutex_exit(&dup
->rcs_lock
);
1319 * void rctl_set_free(rctl_set_t *)
1322 * Delete resource control set and all attached values.
1325 * No value returned.
1328 * No restrictions on context.
1331 rctl_set_free(rctl_set_t
*set
)
1335 mutex_enter(&set
->rcs_lock
);
1336 for (i
= 0; i
< rctl_set_size
; i
++) {
1337 rctl_t
*r
= set
->rcs_ctls
[i
];
1340 rctl_val_t
*v
= r
->rc_values
;
1341 rctl_t
*n
= r
->rc_next
;
1343 kmem_cache_free(rctl_cache
, r
);
1345 rctl_val_list_free(v
);
1350 mutex_exit(&set
->rcs_lock
);
1352 kmem_free(set
->rcs_ctls
, sizeof (rctl_t
*) * rctl_set_size
);
1353 kmem_free(set
, sizeof (rctl_set_t
));
1357 * void rctl_set_reset(rctl_set_t *)
1360 * Resets all rctls within the set such that the lowest value becomes active.
1363 * No value returned.
1366 * No restrictions on context.
1369 rctl_set_reset(rctl_set_t
*set
, struct proc
*p
, rctl_entity_p_t
*e
)
1375 mutex_enter(&set
->rcs_lock
);
1376 for (i
= 0; i
< rctl_set_size
; i
++) {
1377 rctl_t
*r
= set
->rcs_ctls
[i
];
1380 r
->rc_cursor
= r
->rc_values
;
1381 rctl_val_list_reset(r
->rc_cursor
);
1382 RCTLOP_SET(r
, p
, e
, rctl_model_value(r
->rc_dict_entry
,
1383 p
, r
->rc_cursor
->rcv_value
));
1385 ASSERT(r
->rc_cursor
!= NULL
);
1391 mutex_exit(&set
->rcs_lock
);
1395 * void rctl_set_tearoff(rctl_set *, struct proc *)
1398 * Tear off any resource control values on this set with an action recipient
1399 * equal to the specified process (as they are becoming invalid with the
1400 * process's departure from this set as an observer).
1403 * No value returned.
1406 * No restrictions on context
1409 rctl_set_tearoff(rctl_set_t
*set
, struct proc
*p
)
1413 mutex_enter(&set
->rcs_lock
);
1414 for (i
= 0; i
< rctl_set_size
; i
++) {
1415 rctl_t
*r
= set
->rcs_ctls
[i
];
1420 tearoff_rewalk_list
:
1421 rval
= r
->rc_values
;
1423 while (rval
!= NULL
) {
1424 if (rval
->rcv_privilege
== RCPRIV_BASIC
&&
1425 rval
->rcv_action_recipient
== p
) {
1426 if (r
->rc_cursor
== rval
)
1427 r
->rc_cursor
= rval
->rcv_next
;
1429 (void) rctl_val_list_delete(
1430 &r
->rc_values
, rval
);
1432 goto tearoff_rewalk_list
;
1435 rval
= rval
->rcv_next
;
1438 ASSERT(r
->rc_cursor
!= NULL
);
1444 mutex_exit(&set
->rcs_lock
);
1448 rctl_set_find(rctl_set_t
*set
, rctl_hndl_t hndl
, rctl_t
**rctl
)
1450 uint_t index
= hndl
% rctl_set_size
;
1453 ASSERT(MUTEX_HELD(&set
->rcs_lock
));
1455 for (curr_ctl
= set
->rcs_ctls
[index
]; curr_ctl
!= NULL
;
1456 curr_ctl
= curr_ctl
->rc_next
) {
1457 if (curr_ctl
->rc_id
== hndl
) {
1468 * rlim64_t rctl_enforced_value(rctl_hndl_t, rctl_set_t *, struct proc *)
1471 * Given a process, get the next enforced value on the rctl of the specified
1475 * The enforced value.
1478 * For controls on process collectives, p->p_lock must be held across the
1483 rctl_enforced_value(rctl_hndl_t hndl
, rctl_set_t
*rset
, struct proc
*p
)
1488 mutex_enter(&rset
->rcs_lock
);
1490 if (rctl_set_find(rset
, hndl
, &rctl
) == -1)
1491 panic("unknown resource control handle %d requested", hndl
);
1493 ret
= rctl_model_value(rctl
->rc_dict_entry
, p
,
1494 rctl
->rc_cursor
->rcv_value
);
1496 mutex_exit(&rset
->rcs_lock
);
1502 * int rctl_global_get(const char *, rctl_dict_entry_t *)
1505 * Copy a sanitized version of the global rctl for a given resource control
1506 * name. (By sanitization, we mean that the unsafe data pointers have been
1510 * -1 if name not defined, 0 otherwise.
1513 * No restrictions on context. rctl_dict_lock must not be held.
1516 rctl_global_get(const char *name
, rctl_dict_entry_t
*drde
)
1518 rctl_dict_entry_t
*rde
= rctl_dict_lookup(name
);
1523 bcopy(rde
, drde
, sizeof (rctl_dict_entry_t
));
1525 drde
->rcd_next
= NULL
;
1526 drde
->rcd_ops
= NULL
;
1532 * int rctl_global_set(const char *, rctl_dict_entry_t *)
1535 * Transfer the settable fields of the named rctl to the global rctl matching
1536 * the given resource control name.
1539 * -1 if name not defined, 0 otherwise.
1542 * No restrictions on context. rctl_dict_lock must not be held.
1545 rctl_global_set(const char *name
, rctl_dict_entry_t
*drde
)
1547 rctl_dict_entry_t
*rde
= rctl_dict_lookup(name
);
1552 rde
->rcd_flagaction
= drde
->rcd_flagaction
;
1553 rde
->rcd_syslog_level
= drde
->rcd_syslog_level
;
1554 rde
->rcd_strlog_flags
= drde
->rcd_strlog_flags
;
1560 rctl_local_op(rctl_hndl_t hndl
, rctl_val_t
*oval
, rctl_val_t
*nval
,
1561 int (*cbop
)(rctl_hndl_t
, struct proc
*p
, rctl_entity_p_t
*e
, rctl_t
*,
1562 rctl_val_t
*, rctl_val_t
*), struct proc
*p
)
1568 rctl_dict_entry_t
*rde
= rctl_dict_lookup_hndl(hndl
);
1572 ASSERT(MUTEX_HELD(&p
->p_lock
));
1574 rset
= rctl_entity_obtain_rset(rde
, p
);
1579 rctl_entity_obtain_entity_p(rset
->rcs_entity
, p
, &e
);
1581 mutex_enter(&rset
->rcs_lock
);
1583 /* using rctl's hndl, get rctl from local set */
1584 if (rctl_set_find(rset
, hndl
, &rctl
) == -1) {
1585 mutex_exit(&rset
->rcs_lock
);
1589 ret
= cbop(hndl
, p
, &e
, rctl
, oval
, nval
);
1591 mutex_exit(&rset
->rcs_lock
);
1597 rctl_local_get_cb(rctl_hndl_t hndl
, struct proc
*p
, rctl_entity_p_t
*e
,
1598 rctl_t
*rctl
, rctl_val_t
*oval
, rctl_val_t
*nval
)
1604 bcopy(rctl
->rc_values
, nval
, sizeof (rctl_val_t
));
1609 rctl_val_t
*tval
= rctl_val_list_find(&rctl
->rc_values
, oval
);
1613 else if (tval
->rcv_next
== NULL
)
1616 bcopy(tval
->rcv_next
, nval
, sizeof (rctl_val_t
));
1623 * int rctl_local_get(rctl_hndl_t, rctl_val_t *)
1626 * Get the rctl value for the given flags.
1629 * 0 for successful get, errno otherwise.
1632 rctl_local_get(rctl_hndl_t hndl
, rctl_val_t
*oval
, rctl_val_t
*nval
,
1635 return (rctl_local_op(hndl
, oval
, nval
, rctl_local_get_cb
, p
));
1640 rctl_local_delete_cb(rctl_hndl_t hndl
, struct proc
*p
, rctl_entity_p_t
*e
,
1641 rctl_t
*rctl
, rctl_val_t
*oval
, rctl_val_t
*nval
)
1643 if ((oval
= rctl_val_list_find(&rctl
->rc_values
, nval
)) == NULL
)
1646 if (rctl
->rc_cursor
== oval
) {
1647 rctl
->rc_cursor
= oval
->rcv_next
;
1648 rctl_val_list_reset(rctl
->rc_cursor
);
1649 RCTLOP_SET(rctl
, p
, e
, rctl_model_value(rctl
->rc_dict_entry
, p
,
1650 rctl
->rc_cursor
->rcv_value
));
1652 ASSERT(rctl
->rc_cursor
!= NULL
);
1655 (void) rctl_val_list_delete(&rctl
->rc_values
, oval
);
1661 * int rctl_local_delete(rctl_hndl_t, rctl_val_t *)
1664 * Delete the rctl value for the given flags.
1667 * 0 for successful delete, errno otherwise.
1670 rctl_local_delete(rctl_hndl_t hndl
, rctl_val_t
*val
, struct proc
*p
)
1672 return (rctl_local_op(hndl
, NULL
, val
, rctl_local_delete_cb
, p
));
1676 * rctl_local_insert_cb()
1679 * Insert a new value into the rctl's val list. If an error occurs,
1680 * the val list must be left in the same state as when the function
1684 * 0 for successful insert, EINVAL if the value is duplicated in the
1689 rctl_local_insert_cb(rctl_hndl_t hndl
, struct proc
*p
, rctl_entity_p_t
*e
,
1690 rctl_t
*rctl
, rctl_val_t
*oval
, rctl_val_t
*nval
)
1693 * Before inserting, confirm there are no duplicates of this value
1694 * and flag level. If there is a duplicate, flag an error and do
1697 if (rctl_val_list_insert(&rctl
->rc_values
, nval
) != 0)
1700 if (rctl_val_cmp(nval
, rctl
->rc_cursor
, 0) < 0) {
1701 rctl
->rc_cursor
= nval
;
1702 rctl_val_list_reset(rctl
->rc_cursor
);
1703 RCTLOP_SET(rctl
, p
, e
, rctl_model_value(rctl
->rc_dict_entry
, p
,
1704 rctl
->rc_cursor
->rcv_value
));
1706 ASSERT(rctl
->rc_cursor
!= NULL
);
1713 * int rctl_local_insert(rctl_hndl_t, rctl_val_t *)
1716 * Insert the rctl value into the appropriate rctl set for the calling
1717 * process, given the handle.
1720 rctl_local_insert(rctl_hndl_t hndl
, rctl_val_t
*val
, struct proc
*p
)
1722 return (rctl_local_op(hndl
, NULL
, val
, rctl_local_insert_cb
, p
));
1726 * rctl_local_insert_all_cb()
1729 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset().
1731 * Inserts new values from the project database (new_values). alloc_values
1732 * should be a linked list of pre-allocated rctl_val_t, which are used to
1733 * populate (rc_projdb).
1735 * Should the *new_values linked list match the contents of the rctl's
1736 * rp_projdb then we do nothing.
1739 * 0 is always returned.
1743 rctl_local_insert_all_cb(rctl_hndl_t hndl
, struct proc
*p
, rctl_entity_p_t
*e
,
1744 rctl_t
*rctl
, rctl_val_t
*new_values
, rctl_val_t
*alloc_values
)
1747 rctl_val_t
*tmp_val
;
1752 * If this the first time we've set this project rctl, then we delete
1753 * all the privilege values. These privilege values have been set by
1754 * rctl_add_default_limit().
1756 * We save some cycles here by not calling rctl_val_list_delete().
1758 if (rctl
->rc_projdb
== NULL
) {
1759 val
= rctl
->rc_values
;
1761 while (val
!= NULL
) {
1762 if (val
->rcv_privilege
== RCPRIV_PRIVILEGED
) {
1763 if (val
->rcv_prev
!= NULL
)
1764 val
->rcv_prev
->rcv_next
= val
->rcv_next
;
1766 rctl
->rc_values
= val
->rcv_next
;
1768 if (val
->rcv_next
!= NULL
)
1769 val
->rcv_next
->rcv_prev
= val
->rcv_prev
;
1772 val
= val
->rcv_next
;
1773 kmem_cache_free(rctl_val_cache
, tmp_val
);
1775 val
= val
->rcv_next
;
1782 * Delete active values previously set through the project database.
1784 val
= rctl
->rc_projdb
;
1786 while (val
!= NULL
) {
1788 /* Is the old value found in the new values? */
1789 if (rctl_val_list_find(&new_values
, val
) == NULL
) {
1792 * Delete from the active values if it originated from
1793 * the project database.
1795 if (((tmp_val
= rctl_val_list_find(&rctl
->rc_values
,
1797 (tmp_val
->rcv_flagaction
& RCTL_LOCAL_PROJDB
)) {
1798 (void) rctl_val_list_delete(&rctl
->rc_values
,
1802 tmp_val
= val
->rcv_next
;
1803 (void) rctl_val_list_delete(&rctl
->rc_projdb
, val
);
1808 val
= val
->rcv_next
;
1812 * Insert new values from the project database.
1814 while (new_values
!= NULL
) {
1815 next
= new_values
->rcv_next
;
1818 * Insert this new value into the rc_projdb, and duplicate this
1819 * entry to the active list.
1821 if (rctl_val_list_insert(&rctl
->rc_projdb
, new_values
) == 0) {
1823 tmp_val
= alloc_values
->rcv_next
;
1824 bcopy(new_values
, alloc_values
, sizeof (rctl_val_t
));
1825 alloc_values
->rcv_next
= tmp_val
;
1827 if (rctl_val_list_insert(&rctl
->rc_values
,
1828 alloc_values
) == 0) {
1829 /* inserted move alloc_values on */
1830 alloc_values
= tmp_val
;
1835 * Unlike setrctl() we don't want to return an error on
1836 * a duplicate entry; we are concerned solely with
1837 * ensuring that all the values specified are set.
1839 kmem_cache_free(rctl_val_cache
, new_values
);
1844 /* Teardown any unused rctl_val_t */
1845 while (alloc_values
!= NULL
) {
1846 tmp_val
= alloc_values
;
1847 alloc_values
= alloc_values
->rcv_next
;
1848 kmem_cache_free(rctl_val_cache
, tmp_val
);
1851 /* Reset the cursor if rctl values have been modified */
1853 rctl
->rc_cursor
= rctl
->rc_values
;
1854 rctl_val_list_reset(rctl
->rc_cursor
);
1855 RCTLOP_SET(rctl
, p
, e
, rctl_model_value(rctl
->rc_dict_entry
, p
,
1856 rctl
->rc_cursor
->rcv_value
));
1863 rctl_local_insert_all(rctl_hndl_t hndl
, rctl_val_t
*new_values
,
1864 rctl_val_t
*alloc_values
, struct proc
*p
)
1866 return (rctl_local_op(hndl
, new_values
, alloc_values
,
1867 rctl_local_insert_all_cb
, p
));
1871 * rctl_local_replace_all_cb()
1874 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset().
1876 * Clears the active rctl values (rc_values), and stored values from the
1877 * previous insertions from the project database (rc_projdb).
1879 * Inserts new values from the project database (new_values). alloc_values
1880 * should be a linked list of pre-allocated rctl_val_t, which are used to
1881 * populate (rc_projdb).
1884 * 0 is always returned.
1888 rctl_local_replace_all_cb(rctl_hndl_t hndl
, struct proc
*p
, rctl_entity_p_t
*e
,
1889 rctl_t
*rctl
, rctl_val_t
*new_values
, rctl_val_t
*alloc_values
)
1893 rctl_val_t
*tmp_val
;
1895 /* Delete all the privilege vaules */
1896 val
= rctl
->rc_values
;
1898 while (val
!= NULL
) {
1899 if (val
->rcv_privilege
== RCPRIV_PRIVILEGED
) {
1900 if (val
->rcv_prev
!= NULL
)
1901 val
->rcv_prev
->rcv_next
= val
->rcv_next
;
1903 rctl
->rc_values
= val
->rcv_next
;
1905 if (val
->rcv_next
!= NULL
)
1906 val
->rcv_next
->rcv_prev
= val
->rcv_prev
;
1909 val
= val
->rcv_next
;
1910 kmem_cache_free(rctl_val_cache
, tmp_val
);
1912 val
= val
->rcv_next
;
1916 /* Delete the contents of rc_projdb */
1917 val
= rctl
->rc_projdb
;
1918 while (val
!= NULL
) {
1921 val
= val
->rcv_next
;
1922 kmem_cache_free(rctl_val_cache
, tmp_val
);
1924 rctl
->rc_projdb
= NULL
;
1927 * Insert new values from the project database.
1929 while (new_values
!= NULL
) {
1930 next
= new_values
->rcv_next
;
1932 if (rctl_val_list_insert(&rctl
->rc_projdb
, new_values
) == 0) {
1933 tmp_val
= alloc_values
->rcv_next
;
1934 bcopy(new_values
, alloc_values
, sizeof (rctl_val_t
));
1935 alloc_values
->rcv_next
= tmp_val
;
1937 if (rctl_val_list_insert(&rctl
->rc_values
,
1938 alloc_values
) == 0) {
1939 /* inserted, so move alloc_values on */
1940 alloc_values
= tmp_val
;
1944 * Unlike setrctl() we don't want to return an error on
1945 * a duplicate entry; we are concerned solely with
1946 * ensuring that all the values specified are set.
1948 kmem_cache_free(rctl_val_cache
, new_values
);
1954 /* Teardown any unused rctl_val_t */
1955 while (alloc_values
!= NULL
) {
1956 tmp_val
= alloc_values
;
1957 alloc_values
= alloc_values
->rcv_next
;
1958 kmem_cache_free(rctl_val_cache
, tmp_val
);
1961 /* Always reset the cursor */
1962 rctl
->rc_cursor
= rctl
->rc_values
;
1963 rctl_val_list_reset(rctl
->rc_cursor
);
1964 RCTLOP_SET(rctl
, p
, e
, rctl_model_value(rctl
->rc_dict_entry
, p
,
1965 rctl
->rc_cursor
->rcv_value
));
1971 rctl_local_replace_all(rctl_hndl_t hndl
, rctl_val_t
*new_values
,
1972 rctl_val_t
*alloc_values
, struct proc
*p
)
1974 return (rctl_local_op(hndl
, new_values
, alloc_values
,
1975 rctl_local_replace_all_cb
, p
));
1979 rctl_local_replace_cb(rctl_hndl_t hndl
, struct proc
*p
, rctl_entity_p_t
*e
,
1980 rctl_t
*rctl
, rctl_val_t
*oval
, rctl_val_t
*nval
)
1985 /* Verify that old will be delete-able */
1986 tmp
= rctl_val_list_find(&rctl
->rc_values
, oval
);
1990 * Caller should verify that value being deleted is not the
1993 ASSERT(tmp
->rcv_privilege
!= RCPRIV_SYSTEM
);
1996 * rctl_local_insert_cb() does the job of flagging an error
1997 * for any duplicate values. So, call rctl_local_insert_cb()
1998 * for the new value first, then do deletion of the old value.
1999 * Since this is a callback function to rctl_local_op, we can
2000 * count on rcs_lock being held at this point. This guarantees
2001 * that there is at no point a visible list which contains both
2002 * new and old values.
2004 if (ret
= rctl_local_insert_cb(hndl
, p
, e
, rctl
, NULL
, nval
))
2007 ret
= rctl_local_delete_cb(hndl
, p
, e
, rctl
, NULL
, oval
);
2013 * int rctl_local_replace(rctl_hndl_t, void *, int, uint64_t *)
2016 * Replace the rctl value with a new one.
2019 * 0 for successful replace, errno otherwise.
2022 rctl_local_replace(rctl_hndl_t hndl
, rctl_val_t
*oval
, rctl_val_t
*nval
,
2025 return (rctl_local_op(hndl
, oval
, nval
, rctl_local_replace_cb
, p
));
2029 * int rctl_rlimit_get(rctl_hndl_t, struct proc *, struct rlimit64 *)
2032 * To support rlimit compatibility, we need a function which takes a 64-bit
2033 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol.
2034 * This operation is only intended for legacy rlimits.
2037 rctl_rlimit_get(rctl_hndl_t rc
, struct proc
*p
, struct rlimit64
*rlp64
)
2041 rctl_set_t
*rset
= p
->p_rctls
;
2042 int soft_limit_seen
= 0;
2043 int test_for_deny
= 1;
2045 mutex_enter(&rset
->rcs_lock
);
2046 if (rctl_set_find(rset
, rc
, &rctl
) == -1) {
2047 mutex_exit(&rset
->rcs_lock
);
2051 rval
= rctl
->rc_values
;
2053 if (rctl
->rc_dict_entry
->rcd_flagaction
& (RCTL_GLOBAL_DENY_NEVER
|
2054 RCTL_GLOBAL_DENY_ALWAYS
))
2058 * 1. Find the first control value with the RCTL_LOCAL_DENY bit set.
2060 while (rval
!= NULL
&& rval
->rcv_privilege
!= RCPRIV_SYSTEM
) {
2061 if (test_for_deny
&&
2062 (rval
->rcv_flagaction
& RCTL_LOCAL_DENY
) == 0) {
2063 rval
= rval
->rcv_next
;
2068 * 2. If this is an RCPRIV_BASIC value, then we've found the
2069 * effective soft limit and should set rlim_cur. We should then
2070 * continue looking for another control value with the DENY bit
2073 if (rval
->rcv_privilege
== RCPRIV_BASIC
) {
2074 if (soft_limit_seen
) {
2075 rval
= rval
->rcv_next
;
2079 if ((rval
->rcv_flagaction
& RCTL_LOCAL_MAXIMAL
) == 0 &&
2080 rval
->rcv_value
< rctl_model_maximum(
2081 rctl
->rc_dict_entry
, p
))
2082 rlp64
->rlim_cur
= rval
->rcv_value
;
2084 rlp64
->rlim_cur
= RLIM64_INFINITY
;
2085 soft_limit_seen
= 1;
2087 rval
= rval
->rcv_next
;
2092 * 3. This is an RCPRIV_PRIVILEGED value. If we haven't found
2093 * a soft limit candidate, then we've found the effective hard
2094 * and soft limits and should set both If we had found a soft
2095 * limit, then this is only the hard limit and we need only set
2098 if ((rval
->rcv_flagaction
& RCTL_LOCAL_MAXIMAL
) == 0 &&
2099 rval
->rcv_value
< rctl_model_maximum(rctl
->rc_dict_entry
,
2101 rlp64
->rlim_max
= rval
->rcv_value
;
2103 rlp64
->rlim_max
= RLIM64_INFINITY
;
2104 if (!soft_limit_seen
)
2105 rlp64
->rlim_cur
= rlp64
->rlim_max
;
2107 mutex_exit(&rset
->rcs_lock
);
2113 * This control sequence is corrupt, as it is not terminated by
2114 * a system privileged control value.
2116 mutex_exit(&rset
->rcs_lock
);
2121 * 4. If we run into a RCPRIV_SYSTEM value, then the hard limit (and
2122 * the soft, if we haven't a soft candidate) should be the value of the
2123 * system control value.
2125 if ((rval
->rcv_flagaction
& RCTL_LOCAL_MAXIMAL
) == 0 &&
2126 rval
->rcv_value
< rctl_model_maximum(rctl
->rc_dict_entry
, p
))
2127 rlp64
->rlim_max
= rval
->rcv_value
;
2129 rlp64
->rlim_max
= RLIM64_INFINITY
;
2131 if (!soft_limit_seen
)
2132 rlp64
->rlim_cur
= rlp64
->rlim_max
;
2134 mutex_exit(&rset
->rcs_lock
);
2139 * rctl_alloc_gp_t *rctl_rlimit_set_prealloc(uint_t)
2142 * Before making a series of calls to rctl_rlimit_set(), we must have a
2143 * preallocated batch of resource control values, as rctl_rlimit_set() can
2144 * potentially consume two resource control values per call.
2147 * A populated resource control allocation group with 2n resource control
2151 * Must be safe for KM_SLEEP allocations.
2154 rctl_rlimit_set_prealloc(uint_t n
)
2156 rctl_alloc_gp_t
*gp
= kmem_zalloc(sizeof (rctl_alloc_gp_t
), KM_SLEEP
);
2158 ASSERT(MUTEX_NOT_HELD(&curproc
->p_lock
));
2160 gp
->rcag_nvals
= 2 * n
;
2168 * int rctl_rlimit_set(rctl_hndl_t, struct proc *, struct rlimit64 *, int,
2172 * To support rlimit compatibility, we need a function which takes a 64-bit
2173 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol.
2174 * This operation is only intended for legacy rlimits.
2176 * The implementation of rctl_rlimit_set() is a bit clever, as it tries to
2177 * minimize the number of values placed on the value sequence in various
2178 * cases. Furthermore, we don't allow multiple identical privilege-action
2179 * values on the same sequence. (That is, we don't want a sequence like
2180 * "while (1) { rlim.rlim_cur++; setrlimit(..., rlim); }" to exhaust kernel
2181 * memory.) So we want to delete any values with the same privilege value and
2185 * 0 for successful set, errno otherwise. Errno will be either EINVAL
2186 * or EPERM, in keeping with defined errnos for ulimit() and setrlimit()
2191 rctl_rlimit_set(rctl_hndl_t rc
, struct proc
*p
, struct rlimit64
*rlp64
,
2192 rctl_alloc_gp_t
*ragp
, int flagaction
, int signal
, const cred_t
*cr
)
2195 rctl_val_t
*rval
, *rval_priv
, *rval_basic
;
2196 rctl_set_t
*rset
= p
->p_rctls
;
2199 struct rlimit64 cur_rl
;
2201 e
.rcep_t
= RCENTITY_PROCESS
;
2204 if (rlp64
->rlim_cur
> rlp64
->rlim_max
)
2207 if (rctl_rlimit_get(rc
, p
, &cur_rl
) == -1)
2211 * If we are not privileged, we can only lower the hard limit.
2213 if ((rlp64
->rlim_max
> cur_rl
.rlim_max
) &&
2214 cur_rl
.rlim_max
!= RLIM64_INFINITY
&&
2215 secpolicy_resource(cr
) != 0)
2218 mutex_enter(&rset
->rcs_lock
);
2220 if (rctl_set_find(rset
, rc
, &rctl
) == -1) {
2221 mutex_exit(&rset
->rcs_lock
);
2225 rval_priv
= rctl_gp_detach_val(ragp
);
2227 rval
= rctl
->rc_values
;
2229 while (rval
!= NULL
) {
2230 rctl_val_t
*next
= rval
->rcv_next
;
2232 if (rval
->rcv_privilege
== RCPRIV_SYSTEM
)
2235 if ((rval
->rcv_privilege
== RCPRIV_BASIC
) ||
2236 (rval
->rcv_flagaction
& ~RCTL_LOCAL_ACTION_MASK
) ==
2237 (flagaction
& ~RCTL_LOCAL_ACTION_MASK
)) {
2238 if (rctl
->rc_cursor
== rval
) {
2239 rctl
->rc_cursor
= rval
->rcv_next
;
2240 rctl_val_list_reset(rctl
->rc_cursor
);
2241 RCTLOP_SET(rctl
, p
, &e
, rctl_model_value(
2242 rctl
->rc_dict_entry
, p
,
2243 rctl
->rc_cursor
->rcv_value
));
2245 (void) rctl_val_list_delete(&rctl
->rc_values
, rval
);
2251 rval_priv
->rcv_privilege
= RCPRIV_PRIVILEGED
;
2252 rval_priv
->rcv_flagaction
= flagaction
;
2253 if (rlp64
->rlim_max
== RLIM64_INFINITY
) {
2254 rval_priv
->rcv_flagaction
|= RCTL_LOCAL_MAXIMAL
;
2255 max
= rctl
->rc_dict_entry
->rcd_max_native
;
2257 max
= rlp64
->rlim_max
;
2259 rval_priv
->rcv_value
= max
;
2260 rval_priv
->rcv_action_signal
= signal
;
2261 rval_priv
->rcv_action_recipient
= NULL
;
2262 rval_priv
->rcv_action_recip_pid
= -1;
2263 rval_priv
->rcv_firing_time
= 0;
2264 rval_priv
->rcv_prev
= rval_priv
->rcv_next
= NULL
;
2266 (void) rctl_val_list_insert(&rctl
->rc_values
, rval_priv
);
2267 rctl
->rc_cursor
= rval_priv
;
2268 rctl_val_list_reset(rctl
->rc_cursor
);
2269 RCTLOP_SET(rctl
, p
, &e
, rctl_model_value(rctl
->rc_dict_entry
, p
,
2270 rctl
->rc_cursor
->rcv_value
));
2272 if (rlp64
->rlim_cur
!= RLIM64_INFINITY
&& rlp64
->rlim_cur
< max
) {
2273 rval_basic
= rctl_gp_detach_val(ragp
);
2275 rval_basic
->rcv_privilege
= RCPRIV_BASIC
;
2276 rval_basic
->rcv_value
= rlp64
->rlim_cur
;
2277 rval_basic
->rcv_flagaction
= flagaction
;
2278 rval_basic
->rcv_action_signal
= signal
;
2279 rval_basic
->rcv_action_recipient
= p
;
2280 rval_basic
->rcv_action_recip_pid
= p
->p_pid
;
2281 rval_basic
->rcv_firing_time
= 0;
2282 rval_basic
->rcv_prev
= rval_basic
->rcv_next
= NULL
;
2284 (void) rctl_val_list_insert(&rctl
->rc_values
, rval_basic
);
2285 rctl
->rc_cursor
= rval_basic
;
2286 rctl_val_list_reset(rctl
->rc_cursor
);
2287 RCTLOP_SET(rctl
, p
, &e
, rctl_model_value(rctl
->rc_dict_entry
, p
,
2288 rctl
->rc_cursor
->rcv_value
));
2291 ASSERT(rctl
->rc_cursor
!= NULL
);
2293 mutex_exit(&rset
->rcs_lock
);
2299 * rctl_hndl_t rctl_register(const char *, rctl_entity_t, int, rlim64_t,
2300 * rlim64_t, rctl_ops_t *)
2303 * rctl_register() performs a look-up in the dictionary of rctls
2304 * active on the system; if a rctl of that name is absent, an entry is
2305 * made into the dictionary. The rctl is returned with its reference
2306 * count incremented by one. If the rctl name already exists, we panic.
2307 * (Were the resource control system to support dynamic loading and unloading,
2308 * which it is structured for, duplicate registration should lead to load
2309 * failure instead of panicking.)
2311 * Each registered rctl has a requirement that a RCPRIV_SYSTEM limit be
2312 * defined. This limit contains the highest possible value for this quantity
2313 * on the system. Furthermore, the registered control must provide infinite
2314 * values for all applicable address space models supported by the operating
2315 * system. Attempts to set resource control values beyond the system limit
2322 * Caller must be in a context suitable for KM_SLEEP allocations.
2327 rctl_entity_t entity
,
2329 rlim64_t max_native
,
2333 rctl_t
*rctl
= kmem_cache_alloc(rctl_cache
, KM_SLEEP
);
2334 rctl_val_t
*rctl_val
= kmem_cache_alloc(rctl_val_cache
, KM_SLEEP
);
2335 rctl_dict_entry_t
*rctl_de
= kmem_zalloc(sizeof (rctl_dict_entry_t
),
2341 ASSERT(ops
!= NULL
);
2343 bzero(rctl
, sizeof (rctl_t
));
2344 bzero(rctl_val
, sizeof (rctl_val_t
));
2346 if (global_flags
& RCTL_GLOBAL_DENY_NEVER
)
2347 localflags
= RCTL_LOCAL_MAXIMAL
;
2349 localflags
= RCTL_LOCAL_MAXIMAL
| RCTL_LOCAL_DENY
;
2351 rctl_val
->rcv_privilege
= RCPRIV_SYSTEM
;
2352 rctl_val
->rcv_value
= max_native
;
2353 rctl_val
->rcv_flagaction
= localflags
;
2354 rctl_val
->rcv_action_signal
= 0;
2355 rctl_val
->rcv_action_recipient
= NULL
;
2356 rctl_val
->rcv_action_recip_pid
= -1;
2357 rctl_val
->rcv_firing_time
= 0;
2358 rctl_val
->rcv_next
= NULL
;
2359 rctl_val
->rcv_prev
= NULL
;
2361 rctl_de
->rcd_name
= (char *)name
;
2362 rctl_de
->rcd_default_value
= rctl_val
;
2363 rctl_de
->rcd_max_native
= max_native
;
2364 rctl_de
->rcd_max_ilp32
= max_ilp32
;
2365 rctl_de
->rcd_entity
= entity
;
2366 rctl_de
->rcd_ops
= ops
;
2367 rctl_de
->rcd_flagaction
= global_flags
;
2369 rctl
->rc_dict_entry
= rctl_de
;
2370 rctl
->rc_values
= rctl_val
;
2373 * 1. Take global lock, validate nonexistence of name, get ID.
2375 mutex_enter(&rctl_dict_lock
);
2377 if (mod_hash_find(rctl_dict_by_name
, (mod_hash_key_t
)name
,
2378 (mod_hash_val_t
*)&rhndl
) != MH_ERR_NOTFOUND
)
2379 panic("duplicate registration of rctl %s", name
);
2381 rhndl
= rctl_de
->rcd_id
= rctl
->rc_id
=
2382 (rctl_hndl_t
)id_alloc(rctl_ids
);
2385 * 2. Insert name-entry pair in rctl_dict_by_name.
2387 if (mod_hash_insert(rctl_dict_by_name
, (mod_hash_key_t
)name
,
2388 (mod_hash_val_t
)rctl_de
))
2389 panic("unable to insert rctl dict entry for %s (%u)", name
,
2390 (uint_t
)rctl
->rc_id
);
2393 * 3. Insert ID-rctl_t * pair in rctl_dict.
2395 if (mod_hash_find(rctl_dict
, (mod_hash_key_t
)(uintptr_t)rctl
->rc_id
,
2396 (mod_hash_val_t
*)&old_rctl
) != MH_ERR_NOTFOUND
)
2397 panic("duplicate rctl ID %u registered", rctl
->rc_id
);
2399 if (mod_hash_insert(rctl_dict
, (mod_hash_key_t
)(uintptr_t)rctl
->rc_id
,
2400 (mod_hash_val_t
)rctl
))
2401 panic("unable to insert rctl %s/%u (%p)", name
,
2402 (uint_t
)rctl
->rc_id
, (void *)rctl
);
2405 * 3a. Insert rctl_dict_entry_t * in appropriate entity list.
2408 mutex_enter(&rctl_lists_lock
);
2412 case RCENTITY_PROJECT
:
2414 case RCENTITY_PROCESS
:
2415 rctl_de
->rcd_next
= rctl_lists
[entity
];
2416 rctl_lists
[entity
] = rctl_de
;
2419 panic("registering unknown rctl entity %d (%s)", entity
,
2424 mutex_exit(&rctl_lists_lock
);
2429 mutex_exit(&rctl_dict_lock
);
2435 * static int rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p,
2439 * rctl_global_action() takes, in according with the flags on the rctl_dict
2440 * entry for the given control, the appropriate actions on the exceeded
2441 * control value. Additionally, rctl_global_action() updates the firing time
2442 * on the exceeded value.
2445 * A bitmask reflecting the actions actually taken.
2448 * No restrictions on context.
2452 rctl_global_action(rctl_t
*r
, rctl_set_t
*rset
, struct proc
*p
, rctl_val_t
*v
)
2454 rctl_dict_entry_t
*rde
= r
->rc_dict_entry
;
2455 const char *pr
, *en
, *idstr
;
2458 SUFFIX_NONE
, /* id consumed directly */
2459 SUFFIX_NUMERIC
, /* id consumed in suffix */
2460 SUFFIX_STRING
/* idstr consumed in suffix */
2461 } suffix
= SUFFIX_NONE
;
2464 v
->rcv_firing_time
= gethrtime();
2466 switch (v
->rcv_privilege
) {
2470 case RCPRIV_PRIVILEGED
:
2481 switch (rde
->rcd_entity
) {
2482 case RCENTITY_PROCESS
:
2485 suffix
= SUFFIX_NONE
;
2489 id
= p
->p_task
->tk_tkid
;
2490 suffix
= SUFFIX_NUMERIC
;
2492 case RCENTITY_PROJECT
:
2494 id
= p
->p_task
->tk_proj
->kpj_id
;
2495 suffix
= SUFFIX_NUMERIC
;
2499 idstr
= p
->p_zone
->zone_name
;
2500 suffix
= SUFFIX_STRING
;
2503 en
= "unknown entity associated with process";
2505 suffix
= SUFFIX_NONE
;
2509 if (rde
->rcd_flagaction
& RCTL_GLOBAL_SYSLOG
) {
2513 (void) strlog(0, 0, 0,
2514 rde
->rcd_strlog_flags
| log_global
.lz_active
,
2515 "%s rctl %s (value %llu) exceeded by %s %d.",
2516 pr
, rde
->rcd_name
, v
->rcv_value
, en
, id
);
2518 case SUFFIX_NUMERIC
:
2519 (void) strlog(0, 0, 0,
2520 rde
->rcd_strlog_flags
| log_global
.lz_active
,
2521 "%s rctl %s (value %llu) exceeded by process %d"
2523 pr
, rde
->rcd_name
, v
->rcv_value
, p
->p_pid
,
2527 (void) strlog(0, 0, 0,
2528 rde
->rcd_strlog_flags
| log_global
.lz_active
,
2529 "%s rctl %s (value %llu) exceeded by process %d"
2531 pr
, rde
->rcd_name
, v
->rcv_value
, p
->p_pid
,
2537 if (rde
->rcd_flagaction
& RCTL_GLOBAL_DENY_ALWAYS
)
2544 rctl_local_action(rctl_t
*r
, rctl_set_t
*rset
, struct proc
*p
, rctl_val_t
*v
,
2548 sigqueue_t
*sqp
= NULL
;
2549 rctl_dict_entry_t
*rde
= r
->rc_dict_entry
;
2550 int unobservable
= (rde
->rcd_flagaction
& RCTL_GLOBAL_UNOBSERVABLE
);
2552 proc_t
*recipient
= v
->rcv_action_recipient
;
2553 id_t recip_pid
= v
->rcv_action_recip_pid
;
2554 int recip_signal
= v
->rcv_action_signal
;
2555 uint_t flagaction
= v
->rcv_flagaction
;
2557 if (safety
== RCA_UNSAFE_ALL
) {
2558 if (flagaction
& RCTL_LOCAL_DENY
) {
2564 if (flagaction
& RCTL_LOCAL_SIGNAL
) {
2566 * We can build a siginfo only in the case that it is
2567 * safe for us to drop p_lock. (For asynchronous
2568 * checks this is currently not true.)
2570 if (safety
== RCA_SAFE
) {
2571 mutex_exit(&rset
->rcs_lock
);
2572 mutex_exit(&p
->p_lock
);
2573 sqp
= kmem_zalloc(sizeof (sigqueue_t
), KM_SLEEP
);
2574 mutex_enter(&p
->p_lock
);
2575 mutex_enter(&rset
->rcs_lock
);
2577 sqp
->sq_info
.si_signo
= recip_signal
;
2578 sqp
->sq_info
.si_code
= SI_RCTL
;
2579 sqp
->sq_info
.si_errno
= 0;
2580 sqp
->sq_info
.si_entity
= (int)rde
->rcd_entity
;
2583 if (recipient
== NULL
|| recipient
== p
) {
2587 sigtoproc(p
, NULL
, recip_signal
);
2588 } else if (p
== curproc
) {
2590 * Then this is a synchronous test and we can
2591 * direct the signal at the violating thread.
2593 sigaddqa(curproc
, curthread
, sqp
);
2595 sigaddqa(p
, NULL
, sqp
);
2597 } else if (!unobservable
) {
2600 mutex_exit(&rset
->rcs_lock
);
2601 mutex_exit(&p
->p_lock
);
2603 mutex_enter(&pidlock
);
2604 if ((rp
= prfind(recip_pid
)) == recipient
) {
2606 * Recipient process is still alive, but may not
2607 * be in this task or project any longer. In
2608 * this case, the recipient's resource control
2609 * set pertinent to this control will have
2610 * changed--and we will not deliver the signal,
2611 * as the recipient process is trying to tear
2612 * itself off of its former set.
2614 mutex_enter(&rp
->p_lock
);
2615 mutex_exit(&pidlock
);
2617 if (rctl_entity_obtain_rset(rde
, rp
) == rset
) {
2624 sigaddqa(rp
, NULL
, sqp
);
2626 kmem_free(sqp
, sizeof (sigqueue_t
));
2628 mutex_exit(&rp
->p_lock
);
2630 mutex_exit(&pidlock
);
2632 kmem_free(sqp
, sizeof (sigqueue_t
));
2635 mutex_enter(&p
->p_lock
);
2637 * Since we dropped p_lock, we may no longer be in the
2638 * same task or project as we were at entry. It is thus
2639 * unsafe for us to reacquire the set lock at this
2640 * point; callers of rctl_local_action() must handle
2643 ret
|= RCT_LK_ABANDONED
;
2645 kmem_free(sqp
, sizeof (sigqueue_t
));
2649 if ((flagaction
& RCTL_LOCAL_DENY
) &&
2650 (recipient
== NULL
|| recipient
== p
)) {
2658 * int rctl_action(rctl_hndl_t, rctl_set_t *, struct proc *, uint_t)
2661 * Take the action associated with the enforced value (as defined by
2662 * rctl_get_enforced_value()) being exceeded or encountered. Possibly perform
2663 * a restricted subset of the available actions, if circumstances dictate that
2664 * we cannot safely allocate memory (for a sigqueue_t) or guarantee process
2665 * persistence across the duration of the function (an asynchronous action).
2668 * Actions taken, according to the rctl_test bitmask.
2671 * Safe to acquire rcs_lock.
2674 rctl_action(rctl_hndl_t hndl
, rctl_set_t
*rset
, struct proc
*p
, uint_t safety
)
2676 return (rctl_action_entity(hndl
, rset
, p
, NULL
, safety
));
2680 rctl_action_entity(rctl_hndl_t hndl
, rctl_set_t
*rset
, struct proc
*p
,
2681 rctl_entity_p_t
*e
, uint_t safety
)
2685 rctl_entity_p_t e_tmp
;
2687 rctl_action_acquire
:
2688 mutex_enter(&rset
->rcs_lock
);
2689 if (rctl_set_find(rset
, hndl
, &lrctl
) == -1) {
2690 mutex_exit(&rset
->rcs_lock
);
2695 rctl_entity_obtain_entity_p(lrctl
->rc_dict_entry
->rcd_entity
,
2700 if ((ret
& RCT_LK_ABANDONED
) == 0) {
2701 ret
|= rctl_global_action(lrctl
, rset
, p
, lrctl
->rc_cursor
);
2703 RCTLOP_ACTION(lrctl
, p
, e
);
2705 ret
|= rctl_local_action(lrctl
, rset
, p
,
2706 lrctl
->rc_cursor
, safety
);
2708 if (ret
& RCT_LK_ABANDONED
)
2709 goto rctl_action_acquire
;
2712 ret
&= ~RCT_LK_ABANDONED
;
2714 if (!(ret
& RCT_DENY
) &&
2715 lrctl
->rc_cursor
->rcv_next
!= NULL
) {
2716 lrctl
->rc_cursor
= lrctl
->rc_cursor
->rcv_next
;
2718 RCTLOP_SET(lrctl
, p
, e
, rctl_model_value(lrctl
->rc_dict_entry
,
2719 p
, lrctl
->rc_cursor
->rcv_value
));
2722 mutex_exit(&rset
->rcs_lock
);
2728 * int rctl_test(rctl_hndl_t, rctl_set_t *, struct proc *, rctl_qty_t, uint_t)
2731 * Increment the resource associated with the given handle, returning zero if
2732 * the incremented value does not exceed the threshold for the current limit
2736 * Actions taken, according to the rctl_test bitmask.
2739 * p_lock held by caller.
2743 rctl_test(rctl_hndl_t rhndl
, rctl_set_t
*rset
, struct proc
*p
,
2744 rctl_qty_t incr
, uint_t flags
)
2746 return (rctl_test_entity(rhndl
, rset
, p
, NULL
, incr
, flags
));
2750 rctl_test_entity(rctl_hndl_t rhndl
, rctl_set_t
*rset
, struct proc
*p
,
2751 rctl_entity_p_t
*e
, rctl_qty_t incr
, uint_t flags
)
2755 rctl_entity_p_t e_tmp
;
2758 * We don't enforce rctls on the kernel itself.
2764 ASSERT(MUTEX_HELD(&p
->p_lock
));
2766 mutex_enter(&rset
->rcs_lock
);
2769 * Dereference from rctl_set. We don't enforce newly loaded controls
2770 * that haven't been set on this entity (since the only valid value is
2771 * the infinite system value).
2773 if (rctl_set_find(rset
, rhndl
, &lrctl
) == -1) {
2774 mutex_exit(&rset
->rcs_lock
);
2779 * This control is currently unenforced: maximal value on control
2780 * supporting infinitely available resource.
2782 if ((lrctl
->rc_dict_entry
->rcd_flagaction
& RCTL_GLOBAL_INFINITE
) &&
2783 (lrctl
->rc_cursor
->rcv_flagaction
& RCTL_LOCAL_MAXIMAL
)) {
2785 mutex_exit(&rset
->rcs_lock
);
2790 * If we have been called by rctl_test, look up the entity pointer
2791 * from the proc pointer.
2794 rctl_entity_obtain_entity_p(lrctl
->rc_dict_entry
->rcd_entity
,
2800 * Get enforced rctl value and current usage. Test the increment
2801 * with the current usage against the enforced value--take action as
2804 while (RCTLOP_TEST(lrctl
, p
, e
, lrctl
->rc_cursor
, incr
, flags
)) {
2805 if ((ret
& RCT_LK_ABANDONED
) == 0) {
2806 ret
|= rctl_global_action(lrctl
, rset
, p
,
2809 RCTLOP_ACTION(lrctl
, p
, e
);
2811 ret
|= rctl_local_action(lrctl
, rset
, p
,
2812 lrctl
->rc_cursor
, flags
);
2814 if (ret
& RCT_LK_ABANDONED
)
2815 goto rctl_test_acquire
;
2818 ret
&= ~RCT_LK_ABANDONED
;
2820 if ((ret
& RCT_DENY
) == RCT_DENY
||
2821 lrctl
->rc_cursor
->rcv_next
== NULL
) {
2826 lrctl
->rc_cursor
= lrctl
->rc_cursor
->rcv_next
;
2827 RCTLOP_SET(lrctl
, p
, e
, rctl_model_value(lrctl
->rc_dict_entry
,
2828 p
, lrctl
->rc_cursor
->rcv_value
));
2831 mutex_exit(&rset
->rcs_lock
);
2837 * void rctl_init(void)
2840 * Initialize the rctl subsystem, including the primoridal rctls
2841 * provided by the system. New subsystem-specific rctls should _not_ be
2842 * initialized here. (Do it in your own file.)
2848 * Safe for KM_SLEEP allocations. Must be called prior to any process model
2854 rctl_cache
= kmem_cache_create("rctl_cache", sizeof (rctl_t
),
2855 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
2856 rctl_val_cache
= kmem_cache_create("rctl_val_cache",
2857 sizeof (rctl_val_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
2859 rctl_dict
= mod_hash_create_extended("rctl_dict",
2860 rctl_dict_size
, mod_hash_null_keydtor
, rctl_dict_val_dtor
,
2861 rctl_dict_hash_by_id
, NULL
, rctl_dict_id_cmp
, KM_SLEEP
);
2862 rctl_dict_by_name
= mod_hash_create_strhash(
2863 "rctl_handles_by_name", rctl_dict_size
,
2864 mod_hash_null_valdtor
);
2865 rctl_ids
= id_space_create("rctl_ids", 1, max_rctl_hndl
);
2866 bzero(rctl_lists
, (RC_MAX_ENTITY
+ 1) * sizeof (rctl_dict_entry_t
*));
2872 * rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
2875 * Increments the amount of locked memory on a project, and
2876 * zone. If proj is non-NULL the project must be held by the
2877 * caller; if it is NULL the proj and zone of proc_t p are used.
2878 * If chargeproc is non-zero, then the charged amount is cached
2879 * on p->p_locked_mem so that the charge can be migrated when a
2880 * process changes projects.
2884 * EAGAIN - attempting to increment locked memory is denied by one
2885 * or more resource entities.
2888 rctl_incr_locked_mem(proc_t
*p
, kproject_t
*proj
, rctl_qty_t inc
,
2897 ASSERT(MUTEX_HELD(&p
->p_lock
));
2900 zonep
= proj
->kpj_zone
;
2902 projp
= p
->p_task
->tk_proj
;
2906 mutex_enter(&zonep
->zone_mem_lock
);
2908 e
.rcep_p
.proj
= projp
;
2909 e
.rcep_t
= RCENTITY_PROJECT
;
2911 /* check for overflow */
2912 if ((projp
->kpj_data
.kpd_locked_mem
+ inc
) <
2913 projp
->kpj_data
.kpd_locked_mem
) {
2917 if (projp
->kpj_data
.kpd_locked_mem
+ inc
>
2918 projp
->kpj_data
.kpd_locked_mem_ctl
) {
2919 if (rctl_test_entity(rc_project_locked_mem
, projp
->kpj_rctls
,
2920 p
, &e
, inc
, 0) & RCT_DENY
) {
2925 e
.rcep_p
.zone
= zonep
;
2926 e
.rcep_t
= RCENTITY_ZONE
;
2928 /* Check for overflow */
2929 if ((zonep
->zone_locked_mem
+ inc
) < zonep
->zone_locked_mem
) {
2933 if (zonep
->zone_locked_mem
+ inc
> zonep
->zone_locked_mem_ctl
) {
2934 if (rctl_test_entity(rc_zone_locked_mem
, zonep
->zone_rctls
,
2935 p
, &e
, inc
, 0) & RCT_DENY
) {
2941 zonep
->zone_locked_mem
+= inc
;
2942 projp
->kpj_data
.kpd_locked_mem
+= inc
;
2943 if (chargeproc
!= 0) {
2944 p
->p_locked_mem
+= inc
;
2947 mutex_exit(&zonep
->zone_mem_lock
);
2952 * rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
2955 * Decrements the amount of locked memory on a project and
2956 * zone. If proj is non-NULL the project must be held by the
2957 * caller; if it is NULL the proj and zone of proc_t p are used.
2958 * If creditproc is non-zero, then the quantity of locked memory
2959 * is subtracted from p->p_locked_mem.
2965 rctl_decr_locked_mem(proc_t
*p
, kproject_t
*proj
, rctl_qty_t inc
,
2973 zonep
= proj
->kpj_zone
;
2976 ASSERT(MUTEX_HELD(&p
->p_lock
));
2977 projp
= p
->p_task
->tk_proj
;
2981 mutex_enter(&zonep
->zone_mem_lock
);
2982 zonep
->zone_locked_mem
-= inc
;
2983 projp
->kpj_data
.kpd_locked_mem
-= inc
;
2984 if (creditproc
!= 0) {
2986 ASSERT(MUTEX_HELD(&p
->p_lock
));
2987 p
->p_locked_mem
-= inc
;
2989 mutex_exit(&zonep
->zone_mem_lock
);
2993 * rctl_incr_swap(proc_t *, zone_t *, size_t)
2996 * Increments the swap charge on the specified zone.
2999 * 0 on success. EAGAIN if swap increment fails due an rctl value
3003 * p_lock held on specified proc.
3004 * swap must be even multiple of PAGESIZE
3007 rctl_incr_swap(proc_t
*proc
, zone_t
*zone
, size_t swap
)
3011 ASSERT(MUTEX_HELD(&proc
->p_lock
));
3012 ASSERT((swap
& PAGEOFFSET
) == 0);
3013 e
.rcep_p
.zone
= zone
;
3014 e
.rcep_t
= RCENTITY_ZONE
;
3016 mutex_enter(&zone
->zone_mem_lock
);
3018 /* Check for overflow */
3019 if ((zone
->zone_max_swap
+ swap
) < zone
->zone_max_swap
) {
3020 mutex_exit(&zone
->zone_mem_lock
);
3023 if ((zone
->zone_max_swap
+ swap
) >
3024 zone
->zone_max_swap_ctl
) {
3026 if (rctl_test_entity(rc_zone_max_swap
, zone
->zone_rctls
,
3027 proc
, &e
, swap
, 0) & RCT_DENY
) {
3028 mutex_exit(&zone
->zone_mem_lock
);
3032 zone
->zone_max_swap
+= swap
;
3033 mutex_exit(&zone
->zone_mem_lock
);
3038 * rctl_decr_swap(zone_t *, size_t)
3041 * Decrements the swap charge on the specified zone.
3047 * swap must be even multiple of PAGESIZE
3050 rctl_decr_swap(zone_t
*zone
, size_t swap
)
3052 ASSERT((swap
& PAGEOFFSET
) == 0);
3053 mutex_enter(&zone
->zone_mem_lock
);
3054 ASSERT(zone
->zone_max_swap
>= swap
);
3055 zone
->zone_max_swap
-= swap
;
3056 mutex_exit(&zone
->zone_mem_lock
);
3060 * rctl_incr_lofi(proc_t *, zone_t *, size_t)
3063 * Increments the number of lofi devices for the zone.
3066 * 0 on success. EAGAIN if increment fails due an rctl value
3070 * p_lock held on specified proc.
3073 rctl_incr_lofi(proc_t
*proc
, zone_t
*zone
, size_t incr
)
3077 ASSERT(MUTEX_HELD(&proc
->p_lock
));
3080 e
.rcep_p
.zone
= zone
;
3081 e
.rcep_t
= RCENTITY_ZONE
;
3083 mutex_enter(&zone
->zone_rctl_lock
);
3085 /* Check for overflow */
3086 if ((zone
->zone_max_lofi
+ incr
) < zone
->zone_max_lofi
) {
3087 mutex_exit(&zone
->zone_rctl_lock
);
3090 if ((zone
->zone_max_lofi
+ incr
) > zone
->zone_max_lofi_ctl
) {
3091 if (rctl_test_entity(rc_zone_max_lofi
, zone
->zone_rctls
,
3092 proc
, &e
, incr
, 0) & RCT_DENY
) {
3093 mutex_exit(&zone
->zone_rctl_lock
);
3097 zone
->zone_max_lofi
+= incr
;
3098 mutex_exit(&zone
->zone_rctl_lock
);
3103 * rctl_decr_lofi(zone_t *, size_t)
3106 * Decrements the number of lofi devices for the zone.
3109 rctl_decr_lofi(zone_t
*zone
, size_t decr
)
3111 mutex_enter(&zone
->zone_rctl_lock
);
3112 ASSERT(zone
->zone_max_lofi
>= decr
);
3113 zone
->zone_max_lofi
-= decr
;
3114 mutex_exit(&zone
->zone_rctl_lock
);
3118 * Create resource kstat
3121 rctl_kstat_create_common(char *ks_name
, int ks_instance
, char *ks_class
,
3122 uchar_t ks_type
, uint_t ks_ndata
, uchar_t ks_flags
, int ks_zoneid
)
3124 kstat_t
*ksp
= NULL
;
3125 char name
[KSTAT_STRLEN
];
3127 (void) snprintf(name
, KSTAT_STRLEN
, "%s_%d", ks_name
, ks_instance
);
3129 if ((ksp
= kstat_create_zone("caps", ks_zoneid
,
3130 name
, ks_class
, ks_type
,
3131 ks_ndata
, ks_flags
, ks_zoneid
)) != NULL
) {
3132 if (ks_zoneid
!= GLOBAL_ZONEID
)
3133 kstat_zone_add(ksp
, GLOBAL_ZONEID
);
3139 * Create zone-specific resource kstat
3142 rctl_kstat_create_zone(zone_t
*zone
, char *ks_name
, uchar_t ks_type
,
3143 uint_t ks_ndata
, uchar_t ks_flags
)
3145 char name
[KSTAT_STRLEN
];
3147 (void) snprintf(name
, KSTAT_STRLEN
, "%s_zone", ks_name
);
3149 return (rctl_kstat_create_common(name
, zone
->zone_id
, "zone_caps",
3150 ks_type
, ks_ndata
, ks_flags
, zone
->zone_id
));
3154 * Create project-specific resource kstat
3157 rctl_kstat_create_project(kproject_t
*kpj
, char *ks_name
, uchar_t ks_type
,
3158 uint_t ks_ndata
, uchar_t ks_flags
)
3160 char name
[KSTAT_STRLEN
];
3162 (void) snprintf(name
, KSTAT_STRLEN
, "%s_project", ks_name
);
3164 return (rctl_kstat_create_common(name
, kpj
->kpj_id
, "project_caps",
3165 ks_type
, ks_ndata
, ks_flags
, kpj
->kpj_zoneid
));
3169 * Create task-specific resource kstat
3172 rctl_kstat_create_task(task_t
*tk
, char *ks_name
, uchar_t ks_type
,
3173 uint_t ks_ndata
, uchar_t ks_flags
)
3175 char name
[KSTAT_STRLEN
];
3177 (void) snprintf(name
, KSTAT_STRLEN
, "%s_task", ks_name
);
3179 return (rctl_kstat_create_common(name
, tk
->tk_tkid
, "task_caps",
3180 ks_type
, ks_ndata
, ks_flags
, tk
->tk_proj
->kpj_zoneid
));