4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
31 * locality group definitions for kernel
34 #include <sys/types.h>
40 #define LGRP_NONE (-1) /* non-existent lgroup ID */
42 #if (!defined(_KERNEL) && !defined(_KMEMUSER))
43 typedef struct lgrp_mem_policy_info
{ int opaque
[2]; } lgrp_mem_policy_info_t
;
44 #endif /* !_KERNEL && !_KMEMUSER */
46 #if (defined(_KERNEL) || defined(_KMEMUSER))
47 #include <sys/cpuvar.h>
48 #include <sys/bitmap.h>
49 #include <sys/vnode.h>
52 #include <sys/lgrp_user.h>
53 #include <sys/param.h>
55 typedef uint32_t lgrp_load_t
; /* lgrp_loadavg type */
56 typedef uintptr_t lgrp_handle_t
; /* lgrp handle */
58 #define LGRP_NONE_SUCH LGRP_NONE /* non-existent lgroup ID */
59 /* null platform handle */
60 #define LGRP_NULL_HANDLE ((lgrp_handle_t)0xbadbad)
61 #define LGRP_DEFAULT_HANDLE ((lgrp_handle_t)0xbabecafe) /* uma handle */
62 #define LGRP_ROOTID (0) /* root lgroup ID */
65 * Maximum number of lgrps a platform may define.
68 #define LGRP_LOADAVG_MAX UINT32_MAX
71 * The load-average we expect for one cpu-bound thread's worth of load
73 #define LGRP_LOADAVG_THREAD_MAX 65516
76 * The input to the load-average generating function for one cpu-bound thread's
80 #define LGRP_LOADAVG_IN_THREAD_MAX 128
92 * lgroup statistics. Most of these are counters that are updated
93 * dynamically so they are hashed to CPU buckets to reduce cache
94 * interference. The remaining statistics are snapshots of kernel
95 * data, so they aren't stored in the array of counter stats.
97 * For the hashed stats to make sense, you have to sum all the buckets for
98 * that stat, hence macros are provided to read the stats.
101 #define LGRP_NUM_CPU_BUCKETS 8 /* must be power of 2 */
102 #define LGRP_CPU_BUCKET_MASK (LGRP_NUM_CPU_BUCKETS - 1)
105 * Flags for what to do with lgroup memory policy
106 * Used for heap and stack where policy is extended to new segments added to
109 #define LGRP_MP_FLAG_EXTEND_UP 0x1 /* policy should extend up */
110 #define LGRP_MP_FLAG_EXTEND_DOWN 0x2 /* policy should extend down */
112 #define LGRP_STAT(stats, bucket, whichstat) \
113 ((stats)->ls_data[bucket][whichstat])
115 /* Return a pointer suitable for an atomic 64-bit op on the bucket */
116 #define LGRP_STAT_WRITE_PTR(stats, whichstat) \
117 (&LGRP_STAT(stats, (CPU->cpu_id) & LGRP_CPU_BUCKET_MASK, \
120 /* Sum up all the buckets and return the value in 'val' */
121 #define LGRP_STAT_READ(stats, whichstat, val) { \
123 for (val = 0, bkt = 0; bkt < LGRP_NUM_CPU_BUCKETS; bkt++) \
124 val += LGRP_STAT(stats, bkt, whichstat); \
127 /* Reset all buckets for the stat to 0 */
128 #define LGRP_STAT_RESET(stats, stat) { \
130 for (i = 0; i < LGRP_NUM_CPU_BUCKETS; i++) \
131 LGRP_STAT(stats, i, stat) = 0; \
135 * Define all of the statistics that are kept for lgrp kstats,
136 * and their corresponding text names.
139 typedef enum lgrp_stat_types
{
140 LGRP_NUM_MIGR
, /* # migrations away from this lgrp */
141 LGRP_NUM_ALLOC_FAIL
, /* # times alloc fails for chosen lgrp */
142 LGRP_PM_SRC_PGS
, /* # pages migrated from this lgrp */
143 LGRP_PM_DEST_PGS
, /* # pages migrated to this lgrp */
144 LGRP_PM_FAIL_ALLOC_PGS
, /* # pages failed to migrate to this lgrp */
145 LGRP_PM_FAIL_LOCK_PGS
, /* # pages failed to migrate from this lgrp */
146 LGRP_PMM_PGS
, /* # pages marked to migrate from this lgrp */
147 LGRP_PMM_FAIL_PGS
, /* # pages marked to migrate from this lgrp */
148 LGRP_NUM_DEFAULT
, /* # of times default policy applied */
149 LGRP_NUM_NEXT
, /* # of times next touch policy applied */
150 LGRP_NUM_RANDOM
, /* # of times random policy applied */
151 LGRP_NUM_RANDOM_PROC
, /* # of times random proc policy applied */
152 LGRP_NUM_RANDOM_PSET
, /* # of times random pset policy applied */
153 LGRP_NUM_ROUNDROBIN
, /* # of times round robin policy applied */
154 LGRP_NUM_NEXT_SEG
, /* # of times next to seg policy applied */
155 LGRP_NUM_COUNTER_STATS
, /* always last */
156 LGRP_CTR_STATS_ALLOC
= 16 /* cache-align pad - multiple of 8 */
157 /* always keep >= LGRP_NUM_COUNTER_STATS */
160 typedef enum lgrp_snap_stat_types
{
161 LGRP_NUM_CPUS
, /* number of CPUs */
162 LGRP_NUM_PG_FREE
, /* # of free pages */
163 LGRP_NUM_PG_AVAIL
, /* # of allocatable physical pages */
164 LGRP_NUM_PG_INSTALL
, /* # of installed physical pages */
165 LGRP_LOADAVG
, /* unscaled load average of this lgrp */
166 LGRP_LOADAVG_SCALE
, /* load unit of one CPU bound thread */
167 LGRP_NUM_SNAPSHOT_STATS
/* always last */
170 #define LGRP_KSTAT_NAMES \
171 static char *lgrp_kstat_names[] = { \
173 /* Counter stats */ \
176 "pages migrated from", \
177 "pages migrated to", \
178 "pages failed to migrate to", \
179 "pages failed to migrate from", \
180 "pages marked for migration", \
181 "pages failed to mark", \
183 "next-touch policy", \
185 "span process policy", \
186 "span psrset policy", \
187 "round robin policy", \
190 /* Snapshot stats */ \
199 #define LGRP_NUM_STATS ((int)LGRP_NUM_COUNTER_STATS + \
200 (int)LGRP_NUM_SNAPSHOT_STATS)
203 * The contents of this structure are opaque and should only be
204 * accessed through the LGRP_STAT macro.
207 int64_t ls_data
[LGRP_NUM_CPU_BUCKETS
][LGRP_CTR_STATS_ALLOC
];
210 /* The kernel's version of a bitmap of lgroups */
211 typedef uint64_t klgrpset_t
;
214 * This really belongs in memnode.h, but it must be defined here to avoid
215 * recursive inclusion problems. Note that memnode.h includes this header.
217 typedef uint64_t mnodeset_t
;
222 * Visible to generic code and contains the lgroup ID, CPUs in this lgroup,
223 * and a platform handle used to identify this lgroup to the lgroup platform
226 typedef struct lgrp
{
228 lgrp_id_t lgrp_id
; /* which lgroup */
230 lgrp_handle_t lgrp_plathand
; /* handle for platform calls */
231 struct lgrp
*lgrp_parent
; /* parent lgroup */
232 uint_t lgrp_reserved1
; /* filler */
233 uint_t lgrp_childcnt
; /* number of children lgroups */
234 klgrpset_t lgrp_children
; /* children lgroups */
235 klgrpset_t lgrp_leaves
; /* (direct decendant) leaf lgroups */
238 * set of lgroups containing a given type of resource
239 * at this level of locality
241 klgrpset_t lgrp_set
[LGRP_RSRC_COUNT
];
243 mnodeset_t lgrp_mnodes
; /* set of memory nodes in this lgroup */
244 uint_t lgrp_nmnodes
; /* number of memnodes */
245 uint_t lgrp_reserved2
; /* filler */
247 struct cpu
*lgrp_cpu
; /* pointer to a cpu may be null */
248 uint_t lgrp_cpucnt
; /* number of cpus in this lgrp */
249 kstat_t
*lgrp_kstat
; /* per-lgrp kstats */
253 * lgroup load average structure
256 typedef struct lgrp_ld
{
257 lgrp_load_t lpl_loadavg
; /* load average */
258 uint_t lpl_ncpu
; /* how many cpus */
259 lgrp_id_t lpl_lgrpid
; /* which group this lpl part of */
260 lgrp_t
*lpl_lgrp
; /* ptr to lpl's lgrp */
261 struct lgrp_ld
*lpl_parent
; /* lpl of parent lgrp */
262 struct cpu
*lpl_cpus
; /* list of cpus in lpl */
263 /* NULL for non-leaf lgrps */
264 uint_t lpl_nrset
; /* no. of leaf lpls for lgrp */
265 hrtime_t lpl_homed_time
; /* time of last homing to this lpl */
266 uint_t lpl_rset_sz
; /* Resource set capacity */
267 struct lgrp_ld
**lpl_rset
; /* leaf lpls for lgrp */
268 /* contains ptr to self for leaf lgrp */
269 int *lpl_id2rset
; /* mapping of lgrpid to rset index */
273 * 1 << LGRP_MAX_EFFECT_SHFT == lgrp_loadavg_max_effect
275 #define LGRP_MAX_EFFECT_SHFT 16
278 * Operations handled by lgrp_config()
280 typedef enum lgrp_config_flag
{
284 LGRP_CONFIG_CPU_ONLINE
,
285 LGRP_CONFIG_CPU_OFFLINE
,
286 LGRP_CONFIG_CPUPART_ADD
,
287 LGRP_CONFIG_CPUPART_DEL
,
290 LGRP_CONFIG_MEM_RENAME
,
291 LGRP_CONFIG_GEN_UPDATE
,
293 LGRP_CONFIG_LAT_CHANGE_ALL
,
294 LGRP_CONFIG_LAT_CHANGE
295 } lgrp_config_flag_t
;
298 * Memory allocation policies
300 typedef enum lgrp_mem_policy
{
301 LGRP_MEM_POLICY_DEFAULT
,
302 LGRP_MEM_POLICY_NEXT
, /* near LWP to next touch */
303 LGRP_MEM_POLICY_RANDOM_PROC
, /* randomly across process */
304 LGRP_MEM_POLICY_RANDOM_PSET
, /* randomly across processor set */
305 LGRP_MEM_POLICY_RANDOM
, /* randomly across all lgroups */
306 LGRP_MEM_POLICY_ROUNDROBIN
, /* round robin across all lgroups */
307 LGRP_MEM_POLICY_NEXT_CPU
, /* Near next CPU to touch memory */
308 LGRP_MEM_POLICY_NEXT_SEG
, /* lgrp specified directly by seg */
309 LGRP_NUM_MEM_POLICIES
313 * Search scopes for finding resouces
315 typedef enum lgrp_res_ss
{
316 LGRP_SRCH_LOCAL
, /* Search local lgroup only */
317 LGRP_SRCH_HIER
/* Search entire hierarchy */
321 * Cookie used for lgrp mnode selection
323 typedef struct lgrp_mnode_cookie
{
324 lgrp_t
*lmc_lgrp
; /* lgrp under consideration */
325 mnodeset_t lmc_nodes
; /* nodes not yet tried in lgrp */
326 int lmc_cnt
; /* how many nodes in untried set */
327 mnodeset_t lmc_tried
; /* nodes already tried */
328 int lmc_ntried
; /* how many nodes in tried set */
329 lgrp_res_ss_t lmc_scope
; /* consider non-local nodes? */
330 ushort_t lmc_rand
; /* a "random" number */
331 } lgrp_mnode_cookie_t
;
334 * Information needed to implement memory allocation policy
336 typedef struct lgrp_mem_policy_info
{
337 int mem_policy
; /* memory allocation policy */
338 lgrp_id_t mem_lgrpid
; /* lgroup id */
339 } lgrp_mem_policy_info_t
;
342 * Shared memory policy segment
344 typedef struct lgrp_shm_policy_seg
{
345 u_offset_t shm_off
; /* offset into shared object */
346 size_t shm_size
; /* size of segment */
347 lgrp_mem_policy_info_t shm_policy
; /* memory allocation policy */
348 avl_node_t shm_tree
; /* AVL tree */
349 } lgrp_shm_policy_seg_t
;
352 * Shared memory locality info
354 typedef struct lgrp_shm_locality
{
355 size_t loc_count
; /* reference count */
356 avl_tree_t
*loc_tree
; /* policy segment tree */
357 krwlock_t loc_lock
; /* protects tree */
358 } lgrp_shm_locality_t
;
361 * Queries that may be made to determine lgroup memory size
364 LGRP_MEM_SIZE_FREE
, /* number of free pages */
365 LGRP_MEM_SIZE_AVAIL
, /* number of pages in phys_avail */
366 LGRP_MEM_SIZE_INSTALL
/* number of pages in phys_install */
370 * Argument for the memory copy-rename operation, contains the source and the
371 * destination platform handles.
373 typedef struct lgrp_config_mem_rename
{
374 lgrp_handle_t lmem_rename_from
;
375 lgrp_handle_t lmem_rename_to
;
376 } lgrp_config_mem_rename_t
;
378 /* Macro to clear an lgroup bitmap */
379 #define klgrpset_clear(klgrpset) \
380 (klgrpset) = (klgrpset_t)0
382 /* Macro to fill an lgroup bitmap */
383 #define klgrpset_fill(klgrpset) \
384 (klgrpset) = (klgrpset_t)(-1)
386 /* Macro to add an lgroup to an lgroup bitmap */
387 #define klgrpset_add(klgrpset, lgrpid) \
388 (klgrpset) |= ((klgrpset_t)1 << (lgrpid))
390 /* Macro to delete an lgroup from an lgroup bitmap */
391 #define klgrpset_del(klgrpset, lgrpid) \
392 (klgrpset) &= ~((klgrpset_t)1 << (lgrpid))
394 /* Macro to copy a klgrpset into another klgrpset */
395 #define klgrpset_copy(klgrpset_to, klgrpset_from) \
396 (klgrpset_to) = (klgrpset_from)
398 /* Macro to perform an 'and' operation on a pair of lgroup bitmaps */
399 #define klgrpset_and(klgrpset_rslt, klgrpset_arg) \
400 (klgrpset_rslt) &= (klgrpset_arg)
402 /* Macro to perform an 'or' operation on a pair of lgroup bitmaps */
403 #define klgrpset_or(klgrpset_rslt, klgrpset_arg) \
404 (klgrpset_rslt) |= (klgrpset_arg)
406 /* Macro to perform a 'diff' operation on a pair of lgroup bitmaps */
407 #define klgrpset_diff(klgrpset_rslt, klgrpset_arg) \
408 (klgrpset_rslt) &= ~(klgrpset_arg)
410 /* Macro to check if an lgroup is a member of an lgrpset */
411 #define klgrpset_ismember(klgrpset, lgrpid) \
412 ((klgrpset) & ((klgrpset_t)1 << (lgrpid)))
414 /* Macro to check if an lgroup bitmap is empty */
415 #define klgrpset_isempty(klgrpset) \
416 ((klgrpset) == (klgrpset_t)0)
418 /* Macro to check if two lgrpsets intersect */
419 #define klgrpset_intersects(klgrpset1, klgrpset2) \
420 ((klgrpset1) & (klgrpset2))
422 /* Macro to count the number of members in an lgrpset */
423 #define klgrpset_nlgrps(klgrpset, count) \
426 for (lgrpid = 0, count = 0; lgrpid <= lgrp_alloc_max; lgrpid++) {\
427 if (klgrpset_ismember(klgrpset, lgrpid)) \
432 /* Macro to get total memory size (in bytes) of a given set of lgroups */
433 #define klgrpset_totalsize(klgrpset, size) \
435 lgrp_handle_t hand; \
438 for (lgrpid = 0, size = 0; lgrpid <= lgrp_alloc_max; lgrpid++) {\
439 if (klgrpset_ismember(klgrpset, lgrpid) && \
440 lgrp_table[lgrpid]) { \
441 hand = lgrp_table[lgrpid]->lgrp_plathand; \
442 size += lgrp_plat_mem_size(hand, \
443 LGRP_MEM_SIZE_AVAIL) * PAGESIZE; \
449 * Does this lgroup exist?
451 #define LGRP_EXISTS(lgrp) \
452 (lgrp != NULL && lgrp->lgrp_id != LGRP_NONE)
455 * Macro for testing if a CPU is contained in an lgrp.
457 #define LGRP_CONTAINS_CPU(lgrp, cpu) \
458 (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_CPU], \
459 cpu->cpu_lpl->lpl_lgrpid))
462 * Initialize an lgrp_mnode_cookie
464 #define LGRP_MNODE_COOKIE_INIT(c, lgrp, scope) \
466 bzero(&(c), sizeof (lgrp_mnode_cookie_t)); \
467 (&(c))->lmc_lgrp = lgrp; \
468 (&(c))->lmc_nodes = lgrp->lgrp_mnodes; \
469 (&(c))->lmc_cnt = lgrp->lgrp_nmnodes; \
470 (&(c))->lmc_scope = scope; \
471 (&(c))->lmc_rand = (ushort_t)gethrtime_unscaled() >> 4; \
475 * Upgrade cookie scope from LGRP_SRCH_LOCAL to LGRP_SRCH_HIER.
477 #define LGRP_MNODE_COOKIE_UPGRADE(c) \
479 ASSERT((&(c))->lmc_scope == LGRP_SRCH_LOCAL); \
480 (&(c))->lmc_scope = LGRP_SRCH_HIER; \
484 * Macro to see whether memory allocation policy can be reapplied
486 #define LGRP_MEM_POLICY_REAPPLICABLE(p) \
487 (p == LGRP_MEM_POLICY_NEXT)
490 * Return true if lgrp has CPU resources in the cpupart
492 #define LGRP_CPUS_IN_PART(lgrpid, cpupart) \
493 (cpupart->cp_lgrploads[lgrpid].lpl_ncpu > 0)
495 extern int lgrp_alloc_max
;
496 extern lgrp_t
*lgrp_table
[NLGRPS_MAX
]; /* indexed by lgrp_id */
497 extern int nlgrps
; /* number of lgroups in machine */
498 extern int nlgrpsmax
; /* max number of lgroups on platform */
499 extern lgrp_gen_t lgrp_gen
; /* generation of lgroup hierarchy */
500 extern int lgrp_initialized
; /* single-CPU initialization done */
501 extern int lgrp_topo_initialized
; /* lgrp topology constructed */
502 extern lgrp_t
*lgrp_root
; /* root lgroup */
503 extern unsigned int lgrp_topo_levels
;
504 extern lpl_t
*lpl_bootstrap
; /* bootstrap lpl for non-active CPUs */
507 /* generic interfaces */
512 int lgrp_optimizations(void);
513 void lgrp_init(void);
514 void lgrp_setup(void);
515 lgrp_t
*lgrp_create(void);
516 void lgrp_destroy(lgrp_t
*);
517 void lgrp_config(lgrp_config_flag_t
, uintptr_t, uintptr_t);
518 lgrp_t
*lgrp_hand_to_lgrp(lgrp_handle_t
);
523 void lgrp_kstat_create(struct cpu
*);
524 void lgrp_kstat_destroy(struct cpu
*);
525 void lgrp_stat_add(lgrp_id_t
, lgrp_stat_t
, int64_t);
526 int64_t lgrp_stat_read(lgrp_id_t
, lgrp_stat_t
);
531 lgrp_mem_policy_t
lgrp_madv_to_policy(uchar_t
, size_t, int);
532 pgcnt_t
lgrp_mem_size(lgrp_id_t
, lgrp_mem_query_t
);
533 lgrp_t
*lgrp_mem_choose(struct seg
*, caddr_t
, size_t);
534 int lgrp_memnode_choose(lgrp_mnode_cookie_t
*);
535 lgrp_mem_policy_t
lgrp_mem_policy_default(size_t, int);
536 int lgrp_mnode_update(klgrpset_t
, klgrpset_t
*);
537 lgrp_t
*lgrp_pfn_to_lgrp(pfn_t
);
538 lgrp_t
*lgrp_phys_to_lgrp(u_longlong_t
); /* used by numat driver */
539 int lgrp_privm_policy_set(lgrp_mem_policy_t
, lgrp_mem_policy_info_t
*,
541 void lgrp_shm_policy_init(struct anon_map
*, vnode_t
*);
542 void lgrp_shm_policy_fini(struct anon_map
*, vnode_t
*);
543 lgrp_mem_policy_info_t
*lgrp_shm_policy_get(struct anon_map
*, ulong_t
,
544 vnode_t
*, u_offset_t
);
545 int lgrp_shm_policy_set(lgrp_mem_policy_t
, struct anon_map
*, ulong_t
,
546 vnode_t
*, u_offset_t
, size_t);
549 * Used by numat driver
551 int lgrp_query_cpu(processorid_t
, lgrp_id_t
*);
552 int lgrp_query_load(processorid_t
, lgrp_load_t
*);
555 * lgroup thread placement
557 lpl_t
*lgrp_affinity_best(kthread_t
*, struct cpupart
*, lgrp_id_t
,
559 void lgrp_affinity_init(lgrp_affinity_t
**);
560 void lgrp_affinity_free(lgrp_affinity_t
**);
561 lpl_t
*lgrp_choose(kthread_t
*t
, struct cpupart
*);
562 lgrp_t
*lgrp_home_lgrp(void);
563 lgrp_id_t
lgrp_home_id(kthread_t
*);
564 void lgrp_loadavg(lpl_t
*, uint_t
, int);
565 void lgrp_move_thread(kthread_t
*, lpl_t
*, int);
566 uint64_t lgrp_get_trthr_migrations(void);
567 void lgrp_update_trthr_migrations(uint64_t);
572 int lgrp_leaf_add(lgrp_t
*, lgrp_t
**, int, klgrpset_t
*);
573 int lgrp_leaf_delete(lgrp_t
*, lgrp_t
**, int, klgrpset_t
*);
574 int lgrp_rsets_empty(klgrpset_t
*);
575 int lgrp_rsets_member(klgrpset_t
*, lgrp_id_t
);
576 int lgrp_topo_flatten(int, lgrp_t
**, int, klgrpset_t
*);
577 int lgrp_topo_ht_limit(void);
578 int lgrp_topo_ht_limit_default(void);
579 int lgrp_topo_ht_limit_set(int);
580 int lgrp_topo_update(lgrp_t
**, int, klgrpset_t
*);
585 void lpl_topo_bootstrap(lpl_t
*, int);
586 int lpl_topo_flatten(int);
587 int lpl_topo_verify(struct cpupart
*);
590 /* platform interfaces */
591 void lgrp_plat_init(void);
592 void lgrp_plat_main_init(void);
593 lgrp_t
*lgrp_plat_alloc(lgrp_id_t lgrpid
);
594 void lgrp_plat_config(lgrp_config_flag_t
, uintptr_t);
595 lgrp_handle_t
lgrp_plat_cpu_to_hand(processorid_t
);
596 lgrp_handle_t
lgrp_plat_pfn_to_hand(pfn_t
);
597 int lgrp_plat_max_lgrps(void);
598 pgcnt_t
lgrp_plat_mem_size(lgrp_handle_t
, lgrp_mem_query_t
);
599 int lgrp_plat_latency(lgrp_handle_t
, lgrp_handle_t
);
600 lgrp_handle_t
lgrp_plat_root_hand(void);
601 void lgrp_plat_probe(void);
603 extern uint32_t lgrp_expand_proc_thresh
;
604 extern uint32_t lgrp_expand_proc_diff
;
605 extern pgcnt_t lgrp_mem_free_thresh
;
606 extern uint32_t lgrp_loadavg_tolerance
;
607 extern uint32_t lgrp_loadavg_max_effect
;
608 extern uint32_t lgrp_load_thresh
;
609 extern lgrp_mem_policy_t lgrp_mem_policy_root
;
611 #endif /* _KERNEL && _KMEMUSER */